In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.model_selection import train_test_split
import time

In [4]:
text = """Next character prediction is a fundamental task in the field of natural language processing (NLP) that involves predicting the next character in a sequence of text based on the characters that precede it. This task is essential for various applications, including text auto-completion, spell checking, and even in the development of sophisticated AI models capable of generating human-like text.

At its core, next character prediction relies on statistical models or deep learning algorithms to analyze a given sequence of text and predict which character is most likely to follow. These predictions are based on patterns and relationships learned from large datasets of text during the training phase of the model.

One of the most popular approaches to next character prediction involves the use of Recurrent Neural Networks (RNNs), and more specifically, a variant called Long Short-Term Memory (LSTM) networks. RNNs are particularly well-suited for sequential data like text, as they can maintain information in 'memory' about previous characters to inform the prediction of the next character. LSTM networks enhance this capability by being able to remember long-term dependencies, making them even more effective for next character prediction tasks.

Training a model for next character prediction involves feeding it large amounts of text data, allowing it to learn the probability of each character's appearance following a sequence of characters. During this training process, the model adjusts its parameters to minimize the difference between its predictions and the actual outcomes, thus improving its predictive accuracy over time.

Once trained, the model can be used to predict the next character in a given piece of text by considering the sequence of characters that precede it. This can enhance user experience in text editing software, improve efficiency in coding environments with auto-completion features, and enable more natural interactions with AI-based chatbots and virtual assistants.

In summary, next character prediction plays a crucial role in enhancing the capabilities of various NLP applications, making text-based interactions more efficient, accurate, and human-like. Through the use of advanced machine learning models like RNNs and LSTMs, next character prediction continues to evolve, opening new possibilities for the future of text-based technology."""


In [5]:
chars = sorted(list(set(text)))
ix_to_char = {i: ch for i, ch in enumerate(chars)}
char_to_ix = {ch: i for i, ch in enumerate(chars)} 

In [6]:
def prepare_dataset(text, max_length):
    X = []
    y = []
    for i in range(len(text) - max_length):
        sequence = text[i:i + max_length]
        label = text[i + max_length]
        X.append([char_to_ix[char] for char in sequence])
        y.append(char_to_ix[label])
    X = np.array(X)
    y = np.array(y)
    return X, y


In [7]:
class RNNModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNNModel, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.rnn = nn.RNN(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        embedded = self.embedding(x)
        output, _ = self.rnn(embedded)
        output = self.fc(output[:, -1, :])
        return output

In [8]:
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        embedded = self.embedding(x)
        output, _ = self.lstm(embedded)
        output = self.fc(output[:, -1, :])
        return output


In [9]:
class GRUModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(GRUModel, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        embedded = self.embedding(x)
        output, _ = self.gru(embedded)
        output = self.fc(output[:, -1, :])
        return output

In [28]:
def get_num_params(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [20]:
hidden_size = 128
learning_rate = 0.005
epochs = 100

In [21]:
def train_and_validate(model, X_train, y_train, X_val, y_val, criterion, optimizer):
    start_time = time.time()
    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        output = model(X_train)
        loss = criterion(output, y_train)
        loss.backward()
        optimizer.step()

        # Validation
        model.eval()
        with torch.no_grad():
            val_output = model(X_val)
            val_loss = criterion(val_output, y_val)
            _, predicted = torch.max(val_output, 1)
            val_accuracy = (predicted == y_val).float().mean()

        if (epoch+1) % 10 == 0:
            print(f'Epoch {epoch+1}, Loss: {loss.item()}, Validation Loss: {val_loss.item()}, Validation Accuracy: {val_accuracy.item()}')
    
    end_time = time.time()
    execution_time = end_time - start_time
    print(f"Execution Time: {execution_time} seconds")

In [22]:
X_10, y_10 = prepare_dataset(text, 10)
X_train_10, X_val_10, y_train_10, y_val_10 = train_test_split(X_10, y_10, test_size=0.2, random_state=42)
X_train_10 = torch.tensor(X_train_10, dtype=torch.long)
y_train_10 = torch.tensor(y_train_10, dtype=torch.long)
X_val_10 = torch.tensor(X_val_10, dtype=torch.long)
y_val_10 = torch.tensor(y_val_10, dtype=torch.long)


In [23]:
X_20, y_20 = prepare_dataset(text, 20)
X_train_20, X_val_20, y_train_20, y_val_20 = train_test_split(X_20, y_20, test_size=0.2, random_state=42)
X_train_20 = torch.tensor(X_train_20, dtype=torch.long)
y_train_20 = torch.tensor(y_train_20, dtype=torch.long)
X_val_20 = torch.tensor(X_val_20, dtype=torch.long)
y_val_20 = torch.tensor(y_val_20, dtype=torch.long)


In [24]:
X_30, y_30 = prepare_dataset(text, 30)
X_train_30, X_val_30, y_train_30, y_val_30 = train_test_split(X_30, y_30, test_size=0.2, random_state=42)
X_train_30 = torch.tensor(X_train_30, dtype=torch.long)
y_train_30 = torch.tensor(y_train_30, dtype=torch.long)
X_val_30 = torch.tensor(X_val_30, dtype=torch.long)
y_val_30 = torch.tensor(y_val_30, dtype=torch.long)


In [46]:
rnn_model = RNNModel(len(chars), hidden_size, len(chars))
lstm_model = LSTMModel(len(chars), hidden_size, len(chars))
gru_model = GRUModel(len(chars), hidden_size, len(chars))

In [47]:
criterion = nn.CrossEntropyLoss()
rnn_optimizer = optim.Adam(rnn_model.parameters(), lr=learning_rate)
lstm_optimizer = optim.Adam(lstm_model.parameters(), lr=learning_rate)
gru_optimizer = optim.Adam(gru_model.parameters(), lr=learning_rate)

In [32]:
print("RNN Model:")
train_and_validate(rnn_model, X_train_10, y_train_10, X_val_10, y_val_10, criterion, rnn_optimizer)
print(get_num_params(rnn_model))

RNN Model:
Epoch 10, Loss: 2.234295129776001, Validation Loss: 2.307642698287964, Validation Accuracy: 0.3899371027946472
Epoch 20, Loss: 1.7504087686538696, Validation Loss: 2.091737985610962, Validation Accuracy: 0.4234800934791565
Epoch 30, Loss: 1.3947967290878296, Validation Loss: 1.9954954385757446, Validation Accuracy: 0.4633123576641083
Epoch 40, Loss: 1.0721142292022705, Validation Loss: 1.9657237529754639, Validation Accuracy: 0.4800838530063629
Epoch 50, Loss: 0.7760659456253052, Validation Loss: 2.028057813644409, Validation Accuracy: 0.5031446814537048
Epoch 60, Loss: 0.5163461565971375, Validation Loss: 2.1652650833129883, Validation Accuracy: 0.50104820728302
Epoch 70, Loss: 0.3298380672931671, Validation Loss: 2.301595449447632, Validation Accuracy: 0.49895179271698
Epoch 80, Loss: 0.19547824561595917, Validation Loss: 2.4598724842071533, Validation Accuracy: 0.5073375105857849
Epoch 90, Loss: 0.11843431740999222, Validation Loss: 2.634629487991333, Validation Accuracy:

In [35]:
train_and_validate(rnn_model, X_train_20, y_train_20, X_val_20, y_val_20, criterion, rnn_optimizer)
print(get_num_params(rnn_model))

Epoch 10, Loss: 2.2369792461395264, Validation Loss: 2.418715238571167, Validation Accuracy: 0.3389473557472229
Epoch 20, Loss: 1.7649403810501099, Validation Loss: 2.1914923191070557, Validation Accuracy: 0.4294736981391907
Epoch 30, Loss: 1.4170124530792236, Validation Loss: 2.077091693878174, Validation Accuracy: 0.4589473605155945
Epoch 40, Loss: 1.1137441396713257, Validation Loss: 2.0510611534118652, Validation Accuracy: 0.4863157868385315
Epoch 50, Loss: 0.8362480401992798, Validation Loss: 2.100748062133789, Validation Accuracy: 0.5073684453964233
Epoch 60, Loss: 0.5927646160125732, Validation Loss: 2.2230286598205566, Validation Accuracy: 0.5010526180267334
Epoch 70, Loss: 0.38871920108795166, Validation Loss: 2.2948532104492188, Validation Accuracy: 0.5178947448730469
Epoch 80, Loss: 0.24208442866802216, Validation Loss: 2.4266178607940674, Validation Accuracy: 0.5073684453964233
Epoch 90, Loss: 0.1505090743303299, Validation Loss: 2.543039560317993, Validation Accuracy: 0.50

In [41]:
train_and_validate(rnn_model, X_train_30, y_train_30, X_val_30, y_val_30, criterion, rnn_optimizer)
print(get_num_params(rnn_model))

Epoch 10, Loss: 2.2394797801971436, Validation Loss: 2.4579646587371826, Validation Accuracy: 0.32135307788848877
Epoch 20, Loss: 1.7679774761199951, Validation Loss: 2.2606613636016846, Validation Accuracy: 0.3636363744735718
Epoch 30, Loss: 1.410268783569336, Validation Loss: 2.1653339862823486, Validation Accuracy: 0.41437631845474243
Epoch 40, Loss: 1.1049487590789795, Validation Loss: 2.1429476737976074, Validation Accuracy: 0.4482029676437378
Epoch 50, Loss: 0.8304303884506226, Validation Loss: 2.1460483074188232, Validation Accuracy: 0.47145876288414
Epoch 60, Loss: 0.5984236001968384, Validation Loss: 2.192965507507324, Validation Accuracy: 0.486257940530777
Epoch 70, Loss: 0.40031686425209045, Validation Loss: 2.3224353790283203, Validation Accuracy: 0.47991544008255005
Epoch 80, Loss: 0.2601189911365509, Validation Loss: 2.5039961338043213, Validation Accuracy: 0.47780126333236694
Epoch 90, Loss: 0.16624568402767181, Validation Loss: 2.6815297603607178, Validation Accuracy: 0

In [42]:
print("LSTM Model:")
train_and_validate(lstm_model, X_train_10, y_train_10, X_val_10, y_val_10, criterion, lstm_optimizer)
print(get_num_params(lstm_model))

LSTM Model:
Epoch 10, Loss: 2.536839008331299, Validation Loss: 2.4927024841308594, Validation Accuracy: 0.33752620220184326
Epoch 20, Loss: 2.030027151107788, Validation Loss: 2.1998746395111084, Validation Accuracy: 0.4088050425052643
Epoch 30, Loss: 1.638524055480957, Validation Loss: 2.0475406646728516, Validation Accuracy: 0.42557650804519653
Epoch 40, Loss: 1.2863359451293945, Validation Loss: 1.9717531204223633, Validation Accuracy: 0.4633123576641083
Epoch 50, Loss: 0.9557944536209106, Validation Loss: 1.9529662132263184, Validation Accuracy: 0.5031446814537048
Epoch 60, Loss: 0.6617252826690674, Validation Loss: 2.000004291534424, Validation Accuracy: 0.5073375105857849
Epoch 70, Loss: 0.42380449175834656, Validation Loss: 2.109052896499634, Validation Accuracy: 0.4716981053352356
Epoch 80, Loss: 0.25237342715263367, Validation Loss: 2.241546630859375, Validation Accuracy: 0.46960169076919556
Epoch 90, Loss: 0.14868901669979095, Validation Loss: 2.3840861320495605, Validation 

In [45]:
print("LSTM Model:")
train_and_validate(lstm_model, X_train_20, y_train_20, X_val_20, y_val_20, criterion, lstm_optimizer)
print(get_num_params(lstm_model))

LSTM Model:
Epoch 10, Loss: 2.5682215690612793, Validation Loss: 2.6345999240875244, Validation Accuracy: 0.2757894694805145
Epoch 20, Loss: 2.0848042964935303, Validation Loss: 2.3194386959075928, Validation Accuracy: 0.35789474844932556
Epoch 30, Loss: 1.7118844985961914, Validation Loss: 2.14076566696167, Validation Accuracy: 0.4231579005718231
Epoch 40, Loss: 1.3764475584030151, Validation Loss: 2.032195568084717, Validation Accuracy: 0.4694736897945404
Epoch 50, Loss: 1.0752917528152466, Validation Loss: 2.0009572505950928, Validation Accuracy: 0.4989473819732666
Epoch 60, Loss: 0.8084864616394043, Validation Loss: 2.0023958683013916, Validation Accuracy: 0.5052631497383118
Epoch 70, Loss: 0.583466112613678, Validation Loss: 2.0613644123077393, Validation Accuracy: 0.49052631855010986
Epoch 80, Loss: 0.4161577820777893, Validation Loss: 2.1673924922943115, Validation Accuracy: 0.4989473819732666
Epoch 90, Loss: 0.26170071959495544, Validation Loss: 2.2511367797851562, Validation A

In [48]:
print("LSTM Model:")
train_and_validate(lstm_model, X_train_30, y_train_30, X_val_30, y_val_30, criterion, lstm_optimizer)
print(get_num_params(lstm_model))

LSTM Model:
Epoch 10, Loss: 2.571023941040039, Validation Loss: 2.6458563804626465, Validation Accuracy: 0.2452431321144104
Epoch 20, Loss: 2.0687496662139893, Validation Loss: 2.3261616230010986, Validation Accuracy: 0.35517969727516174
Epoch 30, Loss: 1.6803110837936401, Validation Loss: 2.16711163520813, Validation Accuracy: 0.41437631845474243
Epoch 40, Loss: 1.3332363367080688, Validation Loss: 2.0986077785491943, Validation Accuracy: 0.4334038197994232
Epoch 50, Loss: 1.0286128520965576, Validation Loss: 2.083016872406006, Validation Accuracy: 0.4693446159362793
Epoch 60, Loss: 0.7529832124710083, Validation Loss: 2.104940414428711, Validation Accuracy: 0.47145876288414
Epoch 70, Loss: 0.5427717566490173, Validation Loss: 2.1569390296936035, Validation Accuracy: 0.4693446159362793
Epoch 80, Loss: 0.3540922701358795, Validation Loss: 2.2283902168273926, Validation Accuracy: 0.4756871163845062
Epoch 90, Loss: 0.22776061296463013, Validation Loss: 2.315833806991577, Validation Accur