###Import The Libraries

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import re
import json
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from collections import defaultdict

###Load The Tokenizer And The Vocabulary

In [2]:
def load_tokenizer(vocab_path):
    with open(vocab_path, 'r') as f:
        word_index = json.load(f)
    tokenizer = Tokenizer()
    tokenizer.word_index = word_index
    return tokenizer

def load_model(model_path):
  model=torch.load(model_path)
  model.eval()  #Set the model in the evaluation mode
  return model


###Function To Remove Punctuations

In [3]:
def preprocess_text(text):
    return re.sub(r'[^\w\s]', '', text).strip()

###Model Definition

In [4]:
class Next_Word_Predictor(nn.Module):
    def __init__(self, num_classes, embedding_dim=100, lstm_units=150,dropout_prob=0.5):
        super(Next_Word_Predictor, self).__init__()
        self.embedding = nn.Embedding(num_classes, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, lstm_units, batch_first=True, bidirectional=True,dropout=dropout_prob)
        self.fc = nn.Linear(lstm_units * 2, num_classes)
        self.dropout=nn.Dropout(dropout_prob)

    def forward(self, x):
        x = self.embedding(x)
        x, _ = self.lstm(x)
        x = x[:, -1, :]
        x = self.fc(x)
        return x

###Method To Predict The Next Word

In [5]:
# Function to predict next words
def predict_next_words(text, tokenizer, model, max_sequence_length, top_k=5):
    # Preprocess the text
    preprocessed_text = preprocess_text(text)

    # Convert text to sequence of indices
    sequence = tokenizer.texts_to_sequences([preprocessed_text])[0]

    if not sequence:
        return ["No prediction available"]

    # Pad the sequence
    sequence = pad_sequences([sequence], maxlen=max_sequence_length, padding='pre')

    # Convert to tensor
    input_sequence = torch.tensor(sequence, dtype=torch.long)

    # Make the prediction
    with torch.no_grad():
        output = model(input_sequence)

    # Get the top k predicted words
    top_k_indices = torch.topk(output, top_k).indices.squeeze().tolist()
    top_k_probabilities = torch.softmax(output, dim=1).squeeze().tolist()

    # Reverse lookup for the predicted words
    index_word = {index: word for word, index in tokenizer.word_index.items()}
    predictions = [index_word.get(idx, "Unknown") for idx in top_k_indices]
    return predictions


###Testing

In [7]:
# Load the tokenizer and model
tokenizer = load_tokenizer('/content/vocabulary.json')
model = load_model('/content/next_word_predictor.pth')

# Max Sequence Length To Exact Value That Was Used During Training
max_sequence_length = 19

# Example usage
#input_text = "The quick brown"
#predicted_words = predict_next_words(input_text, tokenizer, model, max_sequence_length, top_k=5)
#print(f"Predicted next words for '{input_text}': {predicted_words}")

  model=torch.load(model_path)


###Predictor Function

In [16]:
def Predict_Next(text):
  # Max Sequence Length To Exact Value That Was Used During Training
  max_sequence_length = 17
  return predict_next_words(text, tokenizer, model, max_sequence_length, top_k=5)

###Testing Loop

In [18]:
while True:
    text = input("Enter some text for next word prediction or press 5 to terminate: ")

    # Termination Condition
    if text.strip() == '5':
        print("Terminating the program.")
        break

    #Invalid Entry Check
    try:
        # Try to convert input to integer to check if it's '5'
        num = int(text.strip())
        if num != 5:
            print("Invalid entry, please enter some valid inputs.")
            continue
    except ValueError:

        # Prediction
        if text.strip():
            predicted_words = Predict_Next(text.strip())
            print(f"These are the predicted next words: {predicted_words}")
        else:
            print("Empty input detected. Please enter some text.")


Enter some text for next word prediction or press 5 to terminate: I am in
These are the predicted next words: ['ur', 'the', 'their', 'sorrow', 'those']
Enter some text for next word prediction or press 5 to terminate: I am in the
These are the predicted next words: ['language', 'beginning', 'generations', 'cloud', 'chaldees']
Enter some text for next word prediction or press 5 to terminate: I am in the generations
These are the predicted next words: ['of', 'god', 'from', 'day', 'and']
Enter some text for next word prediction or press 5 to terminate: I am in the generations of
These are the predicted next words: ['terah', 'haran', 'japheth', 'renown', 'shinar']
Enter some text for next word prediction or press 5 to terminate: I am in the generations of japheth
These are the predicted next words: ['from', 'after', 'took', 'the', 'two']
Enter some text for next word prediction or press 5 to terminate: I am in the generations of japheth after
These are the predicted next words: ['their', '