<a href="https://colab.research.google.com/github/karrin10/Introduction-to-Deep-Learning/blob/Homework-3/hw3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>



> Kathleen Arrington

> Homework 3



In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.model_selection import train_test_split
import time

In [2]:
# Problem 1 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

# RNN Sequences of 10 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
# Sample text
text = "Next character prediction is a fundamental task in the field of natural language processing (NLP) that involves predicting the next character in a sequence of text based on the characters that precede it. This task is essential for various applications, including text auto-completion, spell checking, and even in the development of sophisticated AI models capable of generating human-like text. At its core, next character prediction relies on statistical models or deep learning algorithms to analyze a given sequence of text and predict which character is most likely to follow. These predictions are based on patterns and relationships learned from large datasets of text during the training phase of the model. One of the most popular approaches to next character prediction involves the use of Recurrent Neural Networks (RNNs), and more specifically, a variant called Long Short-Term Memory (LSTM) networks. RNNs are particularly well-suited for sequential data like text, as they can maintain information in 'memory' about previous characters to inform the prediction of the next character. LSTM networks enhance this capability by being able to remember long-term dependencies, making them even more effective for next character prediction tasks. Training a model for next character prediction involves feeding it large amounts of text data, allowing it to learn the probability of each character's appearance following a sequence of characters. During this training process, the model adjusts its parameters to minimize the difference between its predictions and the actual outcomes, thus improving its predictive accuracy over time. Once trained, the model can be used to predict the next character in a given piece of text by considering the sequence of characters that precede it. This can enhance user experience in text editing software, improve efficiency in coding environments with auto-completion features, and enable more natural interactions with AI-based chatbots and virtual assistants. In summary, next character prediction plays a crucial role in enhancing the capabilities of various NLP applications, making text-based interactions more efficient, accurate, and human-like. Through the use of advanced machine learning models like RNNs and LSTMs, next character prediction continues to evolve, opening new possibilities for the future of text-based technology."

# Creating character vocab
chars = sorted(list(set(text)))
ix_to_char = {i: ch for i, ch in enumerate(chars)}
char_to_ix = {ch: i for i, ch in enumerate(chars)}

# Setting up the dataset
max_length = 10  # Max length of input sequences

X = []
y = []
for i in range(len(text) - max_length):
    sequence = text[i:i + max_length]
    label = text[i + max_length]
    X.append([char_to_ix[char] for char in sequence])
    y.append(char_to_ix[label])

X = np.array(X)
y = np.array(y)

# Splitting the dataset (training and validation sets)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Converting data
X_train = torch.tensor(X_train, dtype=torch.long)
y_train = torch.tensor(y_train, dtype=torch.long)
X_val = torch.tensor(X_val, dtype=torch.long)
y_val = torch.tensor(y_val, dtype=torch.long)

# Def the RNN model
class CharRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CharRNN, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.rnn = nn.RNN(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden):
        embedded = self.embedding(x)
        output, hidden = self.rnn(embedded, hidden)
        output = self.fc(output[:, -1, :])
        return output, hidden

    def init_hidden(self, batch_size):
        return torch.zeros(1, batch_size, self.hidden_size)

input_size = len(chars)
hidden_size = 128
output_size = len(chars)
learning_rate = 0.005
epochs = 100

model = CharRNN(input_size, hidden_size, output_size)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

start_time = time.time()

# Training the model
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    hidden = model.init_hidden(X_train.size(0))
    output, hidden = model(X_train, hidden)
    loss = criterion(output, y_train)
    loss.backward()
    optimizer.step()

    # Validation
    model.eval()
    with torch.no_grad():
        hidden_val = model.init_hidden(X_val.size(0))
        val_output, _ = model(X_val, hidden_val)
        val_loss = criterion(val_output, y_val)
        _, predicted = torch.max(val_output, 1)
        val_accuracy = (predicted == y_val).float().mean()

    if (epoch+1) % 10 == 0:
        print(f'Epoch {epoch+1}, Loss: {loss.item()}, Validation Loss: {val_loss.item()}, Validation Accuracy: {val_accuracy.item()}')

end_time = time.time()
execution_time = end_time - start_time
print(f"Total execution time for training: {execution_time} seconds")

Epoch 10, Loss: 2.2385051250457764, Validation Loss: 2.2641918659210205, Validation Accuracy: 0.39915966987609863
Epoch 20, Loss: 1.7778737545013428, Validation Loss: 2.0297012329101562, Validation Accuracy: 0.4600840210914612
Epoch 30, Loss: 1.4269391298294067, Validation Loss: 1.8997403383255005, Validation Accuracy: 0.5084033608436584
Epoch 40, Loss: 1.1122448444366455, Validation Loss: 1.8602596521377563, Validation Accuracy: 0.4957983195781708
Epoch 50, Loss: 0.8232355117797852, Validation Loss: 1.8842140436172485, Validation Accuracy: 0.5189075469970703
Epoch 60, Loss: 0.581701934337616, Validation Loss: 1.9899318218231201, Validation Accuracy: 0.5399159789085388
Epoch 70, Loss: 0.3758786618709564, Validation Loss: 2.137108087539673, Validation Accuracy: 0.5315126180648804
Epoch 80, Loss: 0.23257926106452942, Validation Loss: 2.283123254776001, Validation Accuracy: 0.5189075469970703
Epoch 90, Loss: 0.1430366188287735, Validation Loss: 2.4341025352478027, Validation Accuracy: 0.5

In [3]:
# RNN Sequences of 20 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
# Sample text
text = "Next character prediction is a fundamental task in the field of natural language processing (NLP) that involves predicting the next character in a sequence of text based on the characters that precede it. This task is essential for various applications, including text auto-completion, spell checking, and even in the development of sophisticated AI models capable of generating human-like text. At its core, next character prediction relies on statistical models or deep learning algorithms to analyze a given sequence of text and predict which character is most likely to follow. These predictions are based on patterns and relationships learned from large datasets of text during the training phase of the model. One of the most popular approaches to next character prediction involves the use of Recurrent Neural Networks (RNNs), and more specifically, a variant called Long Short-Term Memory (LSTM) networks. RNNs are particularly well-suited for sequential data like text, as they can maintain information in 'memory' about previous characters to inform the prediction of the next character. LSTM networks enhance this capability by being able to remember long-term dependencies, making them even more effective for next character prediction tasks. Training a model for next character prediction involves feeding it large amounts of text data, allowing it to learn the probability of each character's appearance following a sequence of characters. During this training process, the model adjusts its parameters to minimize the difference between its predictions and the actual outcomes, thus improving its predictive accuracy over time. Once trained, the model can be used to predict the next character in a given piece of text by considering the sequence of characters that precede it. This can enhance user experience in text editing software, improve efficiency in coding environments with auto-completion features, and enable more natural interactions with AI-based chatbots and virtual assistants. In summary, next character prediction plays a crucial role in enhancing the capabilities of various NLP applications, making text-based interactions more efficient, accurate, and human-like. Through the use of advanced machine learning models like RNNs and LSTMs, next character prediction continues to evolve, opening new possibilities for the future of text-based technology."

# Creating character vocab
chars = sorted(list(set(text)))
ix_to_char = {i: ch for i, ch in enumerate(chars)}
char_to_ix = {ch: i for i, ch in enumerate(chars)}

# Setting up the dataset
max_length = 20  # Max length of input sequences

X = []
y = []
for i in range(len(text) - max_length):
    sequence = text[i:i + max_length]
    label = text[i + max_length]
    X.append([char_to_ix[char] for char in sequence])
    y.append(char_to_ix[label])

X = np.array(X)
y = np.array(y)

# Splitting the dataset (training and validation sets)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Converting data
X_train = torch.tensor(X_train, dtype=torch.long)
y_train = torch.tensor(y_train, dtype=torch.long)
X_val = torch.tensor(X_val, dtype=torch.long)
y_val = torch.tensor(y_val, dtype=torch.long)

# Def the RNN model
class CharRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CharRNN, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.rnn = nn.RNN(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden):
        embedded = self.embedding(x)
        output, hidden = self.rnn(embedded, hidden)
        output = self.fc(output[:, -1, :])
        return output, hidden

    def init_hidden(self, batch_size):
        return torch.zeros(1, batch_size, self.hidden_size)

input_size = len(chars)
hidden_size = 128
output_size = len(chars)
learning_rate = 0.005
epochs = 100

model = CharRNN(input_size, hidden_size, output_size)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

start_time = time.time()

# Training the model
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    hidden = model.init_hidden(X_train.size(0))
    output, hidden = model(X_train, hidden)
    loss = criterion(output, y_train)
    loss.backward()
    optimizer.step()

    # Validation
    model.eval()
    with torch.no_grad():
        hidden_val = model.init_hidden(X_val.size(0))
        val_output, _ = model(X_val, hidden_val)
        val_loss = criterion(val_output, y_val)
        _, predicted = torch.max(val_output, 1)
        val_accuracy = (predicted == y_val).float().mean()

    if (epoch+1) % 10 == 0:
        print(f'Epoch {epoch+1}, Loss: {loss.item()}, Validation Loss: {val_loss.item()}, Validation Accuracy: {val_accuracy.item()}')

end_time = time.time()
execution_time = end_time - start_time
print(f"Total execution time for training: {execution_time} seconds")

Epoch 10, Loss: 2.2391889095306396, Validation Loss: 2.2719666957855225, Validation Accuracy: 0.3860759437084198
Epoch 20, Loss: 1.7748866081237793, Validation Loss: 2.036180257797241, Validation Accuracy: 0.4641350209712982
Epoch 30, Loss: 1.41562819480896, Validation Loss: 1.9116483926773071, Validation Accuracy: 0.49367088079452515
Epoch 40, Loss: 1.1008833646774292, Validation Loss: 1.8634835481643677, Validation Accuracy: 0.5168776512145996
Epoch 50, Loss: 0.8133541941642761, Validation Loss: 1.870115041732788, Validation Accuracy: 0.5379746556282043
Epoch 60, Loss: 0.5663217902183533, Validation Loss: 1.9256782531738281, Validation Accuracy: 0.550632894039154
Epoch 70, Loss: 0.3931909203529358, Validation Loss: 2.0701310634613037, Validation Accuracy: 0.5400843620300293
Epoch 80, Loss: 0.2486284226179123, Validation Loss: 2.183757781982422, Validation Accuracy: 0.5400843620300293
Epoch 90, Loss: 0.16088984906673431, Validation Loss: 2.3269762992858887, Validation Accuracy: 0.5400

In [4]:
# RNN Sequences of 30 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
# Sample text
text = "Next character prediction is a fundamental task in the field of natural language processing (NLP) that involves predicting the next character in a sequence of text based on the characters that precede it. This task is essential for various applications, including text auto-completion, spell checking, and even in the development of sophisticated AI models capable of generating human-like text. At its core, next character prediction relies on statistical models or deep learning algorithms to analyze a given sequence of text and predict which character is most likely to follow. These predictions are based on patterns and relationships learned from large datasets of text during the training phase of the model. One of the most popular approaches to next character prediction involves the use of Recurrent Neural Networks (RNNs), and more specifically, a variant called Long Short-Term Memory (LSTM) networks. RNNs are particularly well-suited for sequential data like text, as they can maintain information in 'memory' about previous characters to inform the prediction of the next character. LSTM networks enhance this capability by being able to remember long-term dependencies, making them even more effective for next character prediction tasks. Training a model for next character prediction involves feeding it large amounts of text data, allowing it to learn the probability of each character's appearance following a sequence of characters. During this training process, the model adjusts its parameters to minimize the difference between its predictions and the actual outcomes, thus improving its predictive accuracy over time. Once trained, the model can be used to predict the next character in a given piece of text by considering the sequence of characters that precede it. This can enhance user experience in text editing software, improve efficiency in coding environments with auto-completion features, and enable more natural interactions with AI-based chatbots and virtual assistants. In summary, next character prediction plays a crucial role in enhancing the capabilities of various NLP applications, making text-based interactions more efficient, accurate, and human-like. Through the use of advanced machine learning models like RNNs and LSTMs, next character prediction continues to evolve, opening new possibilities for the future of text-based technology."

# Creating character vocab
chars = sorted(list(set(text)))
ix_to_char = {i: ch for i, ch in enumerate(chars)}
char_to_ix = {ch: i for i, ch in enumerate(chars)}

# Setting up the dataset
max_length = 30  # Max length of input sequences

X = []
y = []
for i in range(len(text) - max_length):
    sequence = text[i:i + max_length]
    label = text[i + max_length]
    X.append([char_to_ix[char] for char in sequence])
    y.append(char_to_ix[label])

X = np.array(X)
y = np.array(y)

# Splitting the dataset (training and validation sets)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Converting data
X_train = torch.tensor(X_train, dtype=torch.long)
y_train = torch.tensor(y_train, dtype=torch.long)
X_val = torch.tensor(X_val, dtype=torch.long)
y_val = torch.tensor(y_val, dtype=torch.long)

# Def the RNN model
class CharRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CharRNN, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.rnn = nn.RNN(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden):
        embedded = self.embedding(x)
        output, hidden = self.rnn(embedded, hidden)
        output = self.fc(output[:, -1, :])
        return output, hidden

    def init_hidden(self, batch_size):
        return torch.zeros(1, batch_size, self.hidden_size)

input_size = len(chars)
hidden_size = 128
output_size = len(chars)
learning_rate = 0.005
epochs = 100

model = CharRNN(input_size, hidden_size, output_size)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

start_time = time.time()

# Training the model
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    hidden = model.init_hidden(X_train.size(0))
    output, hidden = model(X_train, hidden)
    loss = criterion(output, y_train)
    loss.backward()
    optimizer.step()

    # Validation
    model.eval()
    with torch.no_grad():
        hidden_val = model.init_hidden(X_val.size(0))
        val_output, _ = model(X_val, hidden_val)
        val_loss = criterion(val_output, y_val)
        _, predicted = torch.max(val_output, 1)
        val_accuracy = (predicted == y_val).float().mean()

    if (epoch+1) % 10 == 0:
        print(f'Epoch {epoch+1}, Loss: {loss.item()}, Validation Loss: {val_loss.item()}, Validation Accuracy: {val_accuracy.item()}')

end_time = time.time()
execution_time = end_time - start_time
print(f"Total execution time for training: {execution_time} seconds")

Epoch 10, Loss: 2.2644314765930176, Validation Loss: 2.4157583713531494, Validation Accuracy: 0.33474576473236084
Epoch 20, Loss: 1.80393385887146, Validation Loss: 2.1750295162200928, Validation Accuracy: 0.4194915294647217
Epoch 30, Loss: 1.454672932624817, Validation Loss: 2.0556657314300537, Validation Accuracy: 0.45974576473236084
Epoch 40, Loss: 1.143332839012146, Validation Loss: 2.028075933456421, Validation Accuracy: 0.46398305892944336
Epoch 50, Loss: 0.8544238805770874, Validation Loss: 2.0476014614105225, Validation Accuracy: 0.4936440587043762
Epoch 60, Loss: 0.6109963655471802, Validation Loss: 2.108325719833374, Validation Accuracy: 0.5042372941970825
Epoch 70, Loss: 0.4173904061317444, Validation Loss: 2.228640556335449, Validation Accuracy: 0.5042372941970825
Epoch 80, Loss: 0.2720876634120941, Validation Loss: 2.345839500427246, Validation Accuracy: 0.508474588394165
Epoch 90, Loss: 0.17956393957138062, Validation Loss: 2.4985969066619873, Validation Accuracy: 0.49364

In [5]:
# LSTM Sequences of 10 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
# Sample text
text = "Next character prediction is a fundamental task in the field of natural language processing (NLP) that involves predicting the next character in a sequence of text based on the characters that precede it. This task is essential for various applications, including text auto-completion, spell checking, and even in the development of sophisticated AI models capable of generating human-like text. At its core, next character prediction relies on statistical models or deep learning algorithms to analyze a given sequence of text and predict which character is most likely to follow. These predictions are based on patterns and relationships learned from large datasets of text during the training phase of the model. One of the most popular approaches to next character prediction involves the use of Recurrent Neural Networks (RNNs), and more specifically, a variant called Long Short-Term Memory (LSTM) networks. RNNs are particularly well-suited for sequential data like text, as they can maintain information in 'memory' about previous characters to inform the prediction of the next character. LSTM networks enhance this capability by being able to remember long-term dependencies, making them even more effective for next character prediction tasks. Training a model for next character prediction involves feeding it large amounts of text data, allowing it to learn the probability of each character's appearance following a sequence of characters. During this training process, the model adjusts its parameters to minimize the difference between its predictions and the actual outcomes, thus improving its predictive accuracy over time. Once trained, the model can be used to predict the next character in a given piece of text by considering the sequence of characters that precede it. This can enhance user experience in text editing software, improve efficiency in coding environments with auto-completion features, and enable more natural interactions with AI-based chatbots and virtual assistants. In summary, next character prediction plays a crucial role in enhancing the capabilities of various NLP applications, making text-based interactions more efficient, accurate, and human-like. Through the use of advanced machine learning models like RNNs and LSTMs, next character prediction continues to evolve, opening new possibilities for the future of text-based technology."

# Creating character vocab
chars = sorted(list(set(text)))
ix_to_char = {i: ch for i, ch in enumerate(chars)}
char_to_ix = {ch: i for i, ch in enumerate(chars)}

# Setting uo the dataset
max_length = 10  # Max length of input sequences

X = []
y = []
for i in range(len(text) - max_length):
    sequence = text[i:i + max_length]
    label = text[i + max_length]
    X.append([char_to_ix[char] for char in sequence])
    y.append(char_to_ix[label])

X = np.array(X)
y = np.array(y)

# Splitting the dataset (training and validation sets)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Converting data
X_train = torch.tensor(X_train, dtype=torch.long)
y_train = torch.tensor(y_train, dtype=torch.long)
X_val = torch.tensor(X_val, dtype=torch.long)
y_val = torch.tensor(y_val, dtype=torch.long)

# Def the LSTM model
class CharLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CharLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.rnn = nn.LSTM(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden):
        embedded = self.embedding(x)
        output, hidden = self.rnn(embedded, hidden)
        output = self.fc(output[:, -1, :])
        return output, hidden

    def init_hidden(self, batch_size):
        # Initialize: hidden state and cell state
        return (torch.zeros(1, batch_size, self.hidden_size), torch.zeros(1, batch_size, self.hidden_size))

input_size = len(chars)
hidden_size = 128
output_size = len(chars)
learning_rate = 0.005
epochs = 100

model = CharLSTM(input_size, hidden_size, output_size)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

start_time = time.time()

# Training the model
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    hidden = model.init_hidden(X_train.size(0))
    output, hidden = model(X_train, hidden)
    loss = criterion(output, y_train)
    loss.backward()
    optimizer.step()

    # Validation
    model.eval()
    with torch.no_grad():
        hidden_val = model.init_hidden(X_val.size(0))
        val_output, _ = model(X_val, hidden_val)
        val_loss = criterion(val_output, y_val)
        _, predicted = torch.max(val_output, 1)
        val_accuracy = (predicted == y_val).float().mean()

    if (epoch+1) % 10 == 0:
        print(f'Epoch {epoch+1}, Loss: {loss.item()}, Validation Loss: {val_loss.item()}, Validation Accuracy: {val_accuracy.item()}')

end_time = time.time()
execution_time = end_time - start_time
print(f"Total execution time for training: {execution_time} seconds")

Epoch 10, Loss: 2.55305814743042, Validation Loss: 2.4951603412628174, Validation Accuracy: 0.3361344635486603
Epoch 20, Loss: 2.04360294342041, Validation Loss: 2.1711132526397705, Validation Accuracy: 0.4285714328289032
Epoch 30, Loss: 1.637508749961853, Validation Loss: 2.0144214630126953, Validation Accuracy: 0.46848738193511963
Epoch 40, Loss: 1.2671133279800415, Validation Loss: 1.9287827014923096, Validation Accuracy: 0.5042017102241516
Epoch 50, Loss: 0.9268880486488342, Validation Loss: 1.9067836999893188, Validation Accuracy: 0.5252100825309753
Epoch 60, Loss: 0.6294106841087341, Validation Loss: 1.9632166624069214, Validation Accuracy: 0.5336134433746338
Epoch 70, Loss: 0.40348073840141296, Validation Loss: 2.085613489151001, Validation Accuracy: 0.5168067216873169
Epoch 80, Loss: 0.2419726699590683, Validation Loss: 2.231685161590576, Validation Accuracy: 0.4978991448879242
Epoch 90, Loss: 0.1488340049982071, Validation Loss: 2.3516738414764404, Validation Accuracy: 0.49579

In [6]:
# LSTM Sequences of 20 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
# Sample text
text = "Next character prediction is a fundamental task in the field of natural language processing (NLP) that involves predicting the next character in a sequence of text based on the characters that precede it. This task is essential for various applications, including text auto-completion, spell checking, and even in the development of sophisticated AI models capable of generating human-like text. At its core, next character prediction relies on statistical models or deep learning algorithms to analyze a given sequence of text and predict which character is most likely to follow. These predictions are based on patterns and relationships learned from large datasets of text during the training phase of the model. One of the most popular approaches to next character prediction involves the use of Recurrent Neural Networks (RNNs), and more specifically, a variant called Long Short-Term Memory (LSTM) networks. RNNs are particularly well-suited for sequential data like text, as they can maintain information in 'memory' about previous characters to inform the prediction of the next character. LSTM networks enhance this capability by being able to remember long-term dependencies, making them even more effective for next character prediction tasks. Training a model for next character prediction involves feeding it large amounts of text data, allowing it to learn the probability of each character's appearance following a sequence of characters. During this training process, the model adjusts its parameters to minimize the difference between its predictions and the actual outcomes, thus improving its predictive accuracy over time. Once trained, the model can be used to predict the next character in a given piece of text by considering the sequence of characters that precede it. This can enhance user experience in text editing software, improve efficiency in coding environments with auto-completion features, and enable more natural interactions with AI-based chatbots and virtual assistants. In summary, next character prediction plays a crucial role in enhancing the capabilities of various NLP applications, making text-based interactions more efficient, accurate, and human-like. Through the use of advanced machine learning models like RNNs and LSTMs, next character prediction continues to evolve, opening new possibilities for the future of text-based technology."

# Creating character vocab
chars = sorted(list(set(text)))
ix_to_char = {i: ch for i, ch in enumerate(chars)}
char_to_ix = {ch: i for i, ch in enumerate(chars)}

# Setting up the dataset
max_length = 20  # Max length of input sequences

X = []
y = []
for i in range(len(text) - max_length):
    sequence = text[i:i + max_length]
    label = text[i + max_length]
    X.append([char_to_ix[char] for char in sequence])
    y.append(char_to_ix[label])

X = np.array(X)
y = np.array(y)

# Splitting the dataset (training and validation sets)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Converting data
X_train = torch.tensor(X_train, dtype=torch.long)
y_train = torch.tensor(y_train, dtype=torch.long)
X_val = torch.tensor(X_val, dtype=torch.long)
y_val = torch.tensor(y_val, dtype=torch.long)

# Def the LSTM model
class CharLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CharLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.rnn = nn.LSTM(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden):
        embedded = self.embedding(x)
        output, hidden = self.rnn(embedded, hidden)
        output = self.fc(output[:, -1, :])
        return output, hidden

    def init_hidden(self, batch_size):
        # Initialize: hidden state and cell state
        return (torch.zeros(1, batch_size, self.hidden_size), torch.zeros(1, batch_size, self.hidden_size))


input_size = len(chars)
hidden_size = 128
output_size = len(chars)
learning_rate = 0.005
epochs = 100

model = CharLSTM(input_size, hidden_size, output_size)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

start_time = time.time()

# Training the model
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    hidden = model.init_hidden(X_train.size(0))
    output, hidden = model(X_train, hidden)
    loss = criterion(output, y_train)
    loss.backward()
    optimizer.step()

    # Validation
    model.eval()
    with torch.no_grad():
        hidden_val = model.init_hidden(X_val.size(0))
        val_output, _ = model(X_val, hidden_val)
        val_loss = criterion(val_output, y_val)
        _, predicted = torch.max(val_output, 1)
        val_accuracy = (predicted == y_val).float().mean()

    if (epoch+1) % 10 == 0:
        print(f'Epoch {epoch+1}, Loss: {loss.item()}, Validation Loss: {val_loss.item()}, Validation Accuracy: {val_accuracy.item()}')

end_time = time.time()
execution_time = end_time - start_time
print(f"Total execution time for training: {execution_time} seconds")

Epoch 10, Loss: 2.5637025833129883, Validation Loss: 2.5468428134918213, Validation Accuracy: 0.2890295386314392
Epoch 20, Loss: 2.081737756729126, Validation Loss: 2.191842794418335, Validation Accuracy: 0.40506330132484436
Epoch 30, Loss: 1.708567500114441, Validation Loss: 2.008366823196411, Validation Accuracy: 0.4641350209712982
Epoch 40, Loss: 1.3617706298828125, Validation Loss: 1.8971798419952393, Validation Accuracy: 0.5189873576164246
Epoch 50, Loss: 1.0382091999053955, Validation Loss: 1.848049521446228, Validation Accuracy: 0.5379746556282043
Epoch 60, Loss: 0.7606589794158936, Validation Loss: 1.8456950187683105, Validation Accuracy: 0.552742600440979
Epoch 70, Loss: 0.5331844091415405, Validation Loss: 1.8813660144805908, Validation Accuracy: 0.5421940684318542
Epoch 80, Loss: 0.4185914993286133, Validation Loss: 1.9653831720352173, Validation Accuracy: 0.550632894039154
Epoch 90, Loss: 0.251871794462204, Validation Loss: 2.050791025161743, Validation Accuracy: 0.53586494

In [7]:
# LSTM Sequences of 30 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
# Sample text
text = "Next character prediction is a fundamental task in the field of natural language processing (NLP) that involves predicting the next character in a sequence of text based on the characters that precede it. This task is essential for various applications, including text auto-completion, spell checking, and even in the development of sophisticated AI models capable of generating human-like text. At its core, next character prediction relies on statistical models or deep learning algorithms to analyze a given sequence of text and predict which character is most likely to follow. These predictions are based on patterns and relationships learned from large datasets of text during the training phase of the model. One of the most popular approaches to next character prediction involves the use of Recurrent Neural Networks (RNNs), and more specifically, a variant called Long Short-Term Memory (LSTM) networks. RNNs are particularly well-suited for sequential data like text, as they can maintain information in 'memory' about previous characters to inform the prediction of the next character. LSTM networks enhance this capability by being able to remember long-term dependencies, making them even more effective for next character prediction tasks. Training a model for next character prediction involves feeding it large amounts of text data, allowing it to learn the probability of each character's appearance following a sequence of characters. During this training process, the model adjusts its parameters to minimize the difference between its predictions and the actual outcomes, thus improving its predictive accuracy over time. Once trained, the model can be used to predict the next character in a given piece of text by considering the sequence of characters that precede it. This can enhance user experience in text editing software, improve efficiency in coding environments with auto-completion features, and enable more natural interactions with AI-based chatbots and virtual assistants. In summary, next character prediction plays a crucial role in enhancing the capabilities of various NLP applications, making text-based interactions more efficient, accurate, and human-like. Through the use of advanced machine learning models like RNNs and LSTMs, next character prediction continues to evolve, opening new possibilities for the future of text-based technology."

# Creating character vocab
chars = sorted(list(set(text)))
ix_to_char = {i: ch for i, ch in enumerate(chars)}
char_to_ix = {ch: i for i, ch in enumerate(chars)}

# Setting up the dataset
max_length = 30  # Max length of input sequences

X = []
y = []
for i in range(len(text) - max_length):
    sequence = text[i:i + max_length]
    label = text[i + max_length]
    X.append([char_to_ix[char] for char in sequence])
    y.append(char_to_ix[label])

X = np.array(X)
y = np.array(y)

# Splitting the dataset (training and validation sets)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Converting data
X_train = torch.tensor(X_train, dtype=torch.long)
y_train = torch.tensor(y_train, dtype=torch.long)
X_val = torch.tensor(X_val, dtype=torch.long)
y_val = torch.tensor(y_val, dtype=torch.long)

# Def the LSTM model
class CharLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CharLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.rnn = nn.LSTM(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden):
        embedded = self.embedding(x)
        output, hidden = self.rnn(embedded, hidden)
        output = self.fc(output[:, -1, :])
        return output, hidden

    def init_hidden(self, batch_size):
        # Initialize: hidden state and cell state
        return (torch.zeros(1, batch_size, self.hidden_size), torch.zeros(1, batch_size, self.hidden_size))

input_size = len(chars)
hidden_size = 128
output_size = len(chars)
learning_rate = 0.005
epochs = 100

model = CharLSTM(input_size, hidden_size, output_size)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

start_time = time.time()

# Training the model
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    hidden = model.init_hidden(X_train.size(0))
    output, hidden = model(X_train, hidden)
    loss = criterion(output, y_train)
    loss.backward()
    optimizer.step()

    # Validation
    model.eval()
    with torch.no_grad():
        hidden_val = model.init_hidden(X_val.size(0))
        val_output, _ = model(X_val, hidden_val)
        val_loss = criterion(val_output, y_val)
        _, predicted = torch.max(val_output, 1)
        val_accuracy = (predicted == y_val).float().mean()

    if (epoch+1) % 10 == 0:
        print(f'Epoch {epoch+1}, Loss: {loss.item()}, Validation Loss: {val_loss.item()}, Validation Accuracy: {val_accuracy.item()}')

end_time = time.time()
execution_time = end_time - start_time
print(f"Total execution time for training: {execution_time} seconds")

Epoch 10, Loss: 2.5027964115142822, Validation Loss: 2.5219295024871826, Validation Accuracy: 0.3156779706478119
Epoch 20, Loss: 2.0016462802886963, Validation Loss: 2.2008767127990723, Validation Accuracy: 0.39406779408454895
Epoch 30, Loss: 1.620553970336914, Validation Loss: 2.0302860736846924, Validation Accuracy: 0.45974576473236084
Epoch 40, Loss: 1.2732549905776978, Validation Loss: 1.9425373077392578, Validation Accuracy: 0.4957627058029175
Epoch 50, Loss: 0.9534385800361633, Validation Loss: 1.9174495935440063, Validation Accuracy: 0.4957627058029175
Epoch 60, Loss: 0.6856467127799988, Validation Loss: 1.9484739303588867, Validation Accuracy: 0.5063559412956238
Epoch 70, Loss: 0.47187209129333496, Validation Loss: 2.0083186626434326, Validation Accuracy: 0.5148305296897888
Epoch 80, Loss: 0.3199961185455322, Validation Loss: 2.107548475265503, Validation Accuracy: 0.5021186470985413
Epoch 90, Loss: 0.20401495695114136, Validation Loss: 2.1935341358184814, Validation Accuracy: 

In [8]:
# GRU Sequences of 10 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
# Sample text
text = "Next character prediction is a fundamental task in the field of natural language processing (NLP) that involves predicting the next character in a sequence of text based on the characters that precede it. This task is essential for various applications, including text auto-completion, spell checking, and even in the development of sophisticated AI models capable of generating human-like text. At its core, next character prediction relies on statistical models or deep learning algorithms to analyze a given sequence of text and predict which character is most likely to follow. These predictions are based on patterns and relationships learned from large datasets of text during the training phase of the model. One of the most popular approaches to next character prediction involves the use of Recurrent Neural Networks (RNNs), and more specifically, a variant called Long Short-Term Memory (LSTM) networks. RNNs are particularly well-suited for sequential data like text, as they can maintain information in 'memory' about previous characters to inform the prediction of the next character. LSTM networks enhance this capability by being able to remember long-term dependencies, making them even more effective for next character prediction tasks. Training a model for next character prediction involves feeding it large amounts of text data, allowing it to learn the probability of each character's appearance following a sequence of characters. During this training process, the model adjusts its parameters to minimize the difference between its predictions and the actual outcomes, thus improving its predictive accuracy over time. Once trained, the model can be used to predict the next character in a given piece of text by considering the sequence of characters that precede it. This can enhance user experience in text editing software, improve efficiency in coding environments with auto-completion features, and enable more natural interactions with AI-based chatbots and virtual assistants. In summary, next character prediction plays a crucial role in enhancing the capabilities of various NLP applications, making text-based interactions more efficient, accurate, and human-like. Through the use of advanced machine learning models like RNNs and LSTMs, next character prediction continues to evolve, opening new possibilities for the future of text-based technology."

# Creating character vocab
chars = sorted(list(set(text)))
ix_to_char = {i: ch for i, ch in enumerate(chars)}
char_to_ix = {ch: i for i, ch in enumerate(chars)}

# Setting up the dataset
max_length = 10  # Max length of input sequences

X = []
y = []
for i in range(len(text) - max_length):
    sequence = text[i:i + max_length]
    label = text[i + max_length]
    X.append([char_to_ix[char] for char in sequence])
    y.append(char_to_ix[label])

X = np.array(X)
y = np.array(y)

# Splitting the dataset (training and validation sets)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Converting data
X_train = torch.tensor(X_train, dtype=torch.long)
y_train = torch.tensor(y_train, dtype=torch.long)
X_val = torch.tensor(X_val, dtype=torch.long)
y_val = torch.tensor(y_val, dtype=torch.long)

# Def the GRU model
class CharGRU(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CharGRU, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.rnn = nn.GRU(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden):
        embedded = self.embedding(x)
        output, hidden = self.rnn(embedded, hidden)
        output = self.fc(output[:, -1, :])
        return output, hidden

    def init_hidden(self, batch_size):
        return torch.zeros(1, batch_size, self.hidden_size)

input_size = len(chars)
hidden_size = 128
output_size = len(chars)
learning_rate = 0.005
epochs = 100

model = CharGRU(input_size, hidden_size, output_size)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

start_time = time.time()

# Training the model
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    hidden = model.init_hidden(X_train.size(0))
    output, hidden = model(X_train, hidden)
    loss = criterion(output, y_train)
    loss.backward()
    optimizer.step()

    # Validation
    model.eval()
    with torch.no_grad():
        hidden_val = model.init_hidden(X_val.size(0))
        val_output, _ = model(X_val, hidden_val)
        val_loss = criterion(val_output, y_val)
        _, predicted = torch.max(val_output, 1)
        val_accuracy = (predicted == y_val).float().mean()

    if (epoch+1) % 10 == 0:
        print(f'Epoch {epoch+1}, Loss: {loss.item()}, Validation Loss: {val_loss.item()}, Validation Accuracy: {val_accuracy.item()}')

end_time = time.time()
execution_time = end_time - start_time
print(f"Total execution time for training: {execution_time} seconds")

Epoch 10, Loss: 2.364985704421997, Validation Loss: 2.3549914360046387, Validation Accuracy: 0.3613445460796356
Epoch 20, Loss: 1.8426392078399658, Validation Loss: 2.064180612564087, Validation Accuracy: 0.4306722581386566
Epoch 30, Loss: 1.4264247417449951, Validation Loss: 1.9003922939300537, Validation Accuracy: 0.47058823704719543
Epoch 40, Loss: 1.0535825490951538, Validation Loss: 1.8541673421859741, Validation Accuracy: 0.5042017102241516
Epoch 50, Loss: 0.7234082818031311, Validation Loss: 1.8814176321029663, Validation Accuracy: 0.5252100825309753
Epoch 60, Loss: 0.45353230834007263, Validation Loss: 1.9857579469680786, Validation Accuracy: 0.529411792755127
Epoch 70, Loss: 0.26054736971855164, Validation Loss: 2.128835678100586, Validation Accuracy: 0.5252100825309753
Epoch 80, Loss: 0.1438896358013153, Validation Loss: 2.264399528503418, Validation Accuracy: 0.5231092572212219
Epoch 90, Loss: 0.08990851789712906, Validation Loss: 2.3765430450439453, Validation Accuracy: 0.5

In [9]:
# GRU Sequences of 20 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
# Sample text
text = "Next character prediction is a fundamental task in the field of natural language processing (NLP) that involves predicting the next character in a sequence of text based on the characters that precede it. This task is essential for various applications, including text auto-completion, spell checking, and even in the development of sophisticated AI models capable of generating human-like text. At its core, next character prediction relies on statistical models or deep learning algorithms to analyze a given sequence of text and predict which character is most likely to follow. These predictions are based on patterns and relationships learned from large datasets of text during the training phase of the model. One of the most popular approaches to next character prediction involves the use of Recurrent Neural Networks (RNNs), and more specifically, a variant called Long Short-Term Memory (LSTM) networks. RNNs are particularly well-suited for sequential data like text, as they can maintain information in 'memory' about previous characters to inform the prediction of the next character. LSTM networks enhance this capability by being able to remember long-term dependencies, making them even more effective for next character prediction tasks. Training a model for next character prediction involves feeding it large amounts of text data, allowing it to learn the probability of each character's appearance following a sequence of characters. During this training process, the model adjusts its parameters to minimize the difference between its predictions and the actual outcomes, thus improving its predictive accuracy over time. Once trained, the model can be used to predict the next character in a given piece of text by considering the sequence of characters that precede it. This can enhance user experience in text editing software, improve efficiency in coding environments with auto-completion features, and enable more natural interactions with AI-based chatbots and virtual assistants. In summary, next character prediction plays a crucial role in enhancing the capabilities of various NLP applications, making text-based interactions more efficient, accurate, and human-like. Through the use of advanced machine learning models like RNNs and LSTMs, next character prediction continues to evolve, opening new possibilities for the future of text-based technology."

# Creating character vocab
chars = sorted(list(set(text)))
ix_to_char = {i: ch for i, ch in enumerate(chars)}
char_to_ix = {ch: i for i, ch in enumerate(chars)}

# Setting up the dataset
max_length = 20  # Max length of input sequences

X = []
y = []
for i in range(len(text) - max_length):
    sequence = text[i:i + max_length]
    label = text[i + max_length]
    X.append([char_to_ix[char] for char in sequence])
    y.append(char_to_ix[label])

X = np.array(X)
y = np.array(y)

# Splitting the dataset (training and validation sets)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Converting data
X_train = torch.tensor(X_train, dtype=torch.long)
y_train = torch.tensor(y_train, dtype=torch.long)
X_val = torch.tensor(X_val, dtype=torch.long)
y_val = torch.tensor(y_val, dtype=torch.long)

# Def the GRU model
class CharGRU(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CharGRU, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.rnn = nn.GRU(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden):
        embedded = self.embedding(x)
        output, hidden = self.rnn(embedded, hidden)
        output = self.fc(output[:, -1, :])
        return output, hidden

    def init_hidden(self, batch_size):
        return torch.zeros(1, batch_size, self.hidden_size)

input_size = len(chars)
hidden_size = 128
output_size = len(chars)
learning_rate = 0.005
epochs = 100

model = CharGRU(input_size, hidden_size, output_size)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

start_time = time.time()

# Training the model
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    hidden = model.init_hidden(X_train.size(0))
    output, hidden = model(X_train, hidden)
    loss = criterion(output, y_train)
    loss.backward()
    optimizer.step()

    # Validation
    model.eval()
    with torch.no_grad():
        hidden_val = model.init_hidden(X_val.size(0))
        val_output, _ = model(X_val, hidden_val)
        val_loss = criterion(val_output, y_val)
        _, predicted = torch.max(val_output, 1)
        val_accuracy = (predicted == y_val).float().mean()

    if (epoch+1) % 10 == 0:
        print(f'Epoch {epoch+1}, Loss: {loss.item()}, Validation Loss: {val_loss.item()}, Validation Accuracy: {val_accuracy.item()}')

end_time = time.time()
execution_time = end_time - start_time
print(f"Total execution time for training: {execution_time} seconds")

Epoch 10, Loss: 2.415750026702881, Validation Loss: 2.420889377593994, Validation Accuracy: 0.3291139304637909
Epoch 20, Loss: 1.911820888519287, Validation Loss: 2.099954843521118, Validation Accuracy: 0.4345991611480713
Epoch 30, Loss: 1.5194735527038574, Validation Loss: 1.9341320991516113, Validation Accuracy: 0.49367088079452515
Epoch 40, Loss: 1.1597490310668945, Validation Loss: 1.8580251932144165, Validation Accuracy: 0.5189873576164246
Epoch 50, Loss: 0.8310125470161438, Validation Loss: 1.8433629274368286, Validation Accuracy: 0.5358649492263794
Epoch 60, Loss: 0.5492903590202332, Validation Loss: 1.883860468864441, Validation Accuracy: 0.5464134812355042
Epoch 70, Loss: 0.3352445960044861, Validation Loss: 1.993269443511963, Validation Accuracy: 0.5358649492263794
Epoch 80, Loss: 0.19381612539291382, Validation Loss: 2.155100107192993, Validation Accuracy: 0.5316455960273743
Epoch 90, Loss: 0.11297536641359329, Validation Loss: 2.30226469039917, Validation Accuracy: 0.529535

In [10]:
# GRU Sequences of 30 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
# Sample text
text = "Next character prediction is a fundamental task in the field of natural language processing (NLP) that involves predicting the next character in a sequence of text based on the characters that precede it. This task is essential for various applications, including text auto-completion, spell checking, and even in the development of sophisticated AI models capable of generating human-like text. At its core, next character prediction relies on statistical models or deep learning algorithms to analyze a given sequence of text and predict which character is most likely to follow. These predictions are based on patterns and relationships learned from large datasets of text during the training phase of the model. One of the most popular approaches to next character prediction involves the use of Recurrent Neural Networks (RNNs), and more specifically, a variant called Long Short-Term Memory (LSTM) networks. RNNs are particularly well-suited for sequential data like text, as they can maintain information in 'memory' about previous characters to inform the prediction of the next character. LSTM networks enhance this capability by being able to remember long-term dependencies, making them even more effective for next character prediction tasks. Training a model for next character prediction involves feeding it large amounts of text data, allowing it to learn the probability of each character's appearance following a sequence of characters. During this training process, the model adjusts its parameters to minimize the difference between its predictions and the actual outcomes, thus improving its predictive accuracy over time. Once trained, the model can be used to predict the next character in a given piece of text by considering the sequence of characters that precede it. This can enhance user experience in text editing software, improve efficiency in coding environments with auto-completion features, and enable more natural interactions with AI-based chatbots and virtual assistants. In summary, next character prediction plays a crucial role in enhancing the capabilities of various NLP applications, making text-based interactions more efficient, accurate, and human-like. Through the use of advanced machine learning models like RNNs and LSTMs, next character prediction continues to evolve, opening new possibilities for the future of text-based technology."

# Creating character vocab
chars = sorted(list(set(text)))
ix_to_char = {i: ch for i, ch in enumerate(chars)}
char_to_ix = {ch: i for i, ch in enumerate(chars)}

# Setting up the dataset
max_length = 30  # Max length of input sequences

X = []
y = []
for i in range(len(text) - max_length):
    sequence = text[i:i + max_length]
    label = text[i + max_length]
    X.append([char_to_ix[char] for char in sequence])
    y.append(char_to_ix[label])

X = np.array(X)
y = np.array(y)

# Splitting the dataset (training and validation sets)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Converting data
X_train = torch.tensor(X_train, dtype=torch.long)
y_train = torch.tensor(y_train, dtype=torch.long)
X_val = torch.tensor(X_val, dtype=torch.long)
y_val = torch.tensor(y_val, dtype=torch.long)

# Def the GRU model
class CharGRU(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CharGRU, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.rnn = nn.GRU(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden):
        embedded = self.embedding(x)
        output, hidden = self.rnn(embedded, hidden)
        output = self.fc(output[:, -1, :])
        return output, hidden

    def init_hidden(self, batch_size):
        return torch.zeros(1, batch_size, self.hidden_size)

input_size = len(chars)
hidden_size = 128
output_size = len(chars)
learning_rate = 0.005
epochs = 100

model = CharGRU(input_size, hidden_size, output_size)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

start_time = time.time()

# Training the model
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    hidden = model.init_hidden(X_train.size(0))
    output, hidden = model(X_train, hidden)
    loss = criterion(output, y_train)
    loss.backward()
    optimizer.step()

    # Validation
    model.eval()
    with torch.no_grad():
        hidden_val = model.init_hidden(X_val.size(0))
        val_output, _ = model(X_val, hidden_val)
        val_loss = criterion(val_output, y_val)
        _, predicted = torch.max(val_output, 1)
        val_accuracy = (predicted == y_val).float().mean()

    if (epoch+1) % 10 == 0:
        print(f'Epoch {epoch+1}, Loss: {loss.item()}, Validation Loss: {val_loss.item()}, Validation Accuracy: {val_accuracy.item()}')

end_time = time.time()
execution_time = end_time - start_time
print(f"Total execution time for training: {execution_time} seconds")

Epoch 10, Loss: 2.38531756401062, Validation Loss: 2.4821066856384277, Validation Accuracy: 0.3050847351551056
Epoch 20, Loss: 1.8851953744888306, Validation Loss: 2.1571004390716553, Validation Accuracy: 0.402542382478714
Epoch 30, Loss: 1.4832574129104614, Validation Loss: 1.994583249092102, Validation Accuracy: 0.4406779706478119
Epoch 40, Loss: 1.1179853677749634, Validation Loss: 1.9448225498199463, Validation Accuracy: 0.48093220591545105
Epoch 50, Loss: 0.7796461582183838, Validation Loss: 1.9509553909301758, Validation Accuracy: 0.5063559412956238
Epoch 60, Loss: 0.49570509791374207, Validation Loss: 2.0373952388763428, Validation Accuracy: 0.5063559412956238
Epoch 70, Loss: 0.29864344000816345, Validation Loss: 2.1821956634521484, Validation Accuracy: 0.4957627058029175
Epoch 80, Loss: 0.16667971014976501, Validation Loss: 2.3237035274505615, Validation Accuracy: 0.5
Epoch 90, Loss: 0.09452111274003983, Validation Loss: 2.4657490253448486, Validation Accuracy: 0.49152541160583