In [1]:
import re
import random
from collections import defaultdict, Counter

In [2]:
class NGramAutoComplete:
    def __init__(self, corpus, n=3):
        """Initializes the N-gram model for auto-completion."""
        self.n = n
        self.ngrams = defaultdict(Counter)
        self.build_model(corpus)

    def preprocess_text(self, text):
        """Preprocesses text by converting to lowercase and tokenizing."""
        text = re.sub(r'[^a-zA-Z0-9\s]', '', text.lower())
        return text.split()

    def build_model(self, corpus):
        """Builds an N-gram frequency model from the given corpus."""
        tokens = self.preprocess_text(corpus)
        for i in range(len(tokens) - self.n + 1):
            context = tuple(tokens[i:i+self.n-1])
            next_word = tokens[i+self.n-1]
            self.ngrams[context][next_word] += 1

    def predict_next_word(self, context):
        """Predicts the most probable next word based on the given context."""
        context = tuple(self.preprocess_text(context)[-self.n+1:])
        if context in self.ngrams:
            return self.ngrams[context].most_common(1)[0][0]
        return None

    def generate_text(self, seed_text, max_words=10):
        """Generates text by predicting words iteratively."""
        words = self.preprocess_text(seed_text)
        for _ in range(max_words):
            next_word = self.predict_next_word(' '.join(words))
            if not next_word:
                break
            words.append(next_word)
        return ' '.join(words)


In [3]:
dummy_corpus = "The quick brown fox jumps over the lazy dog. The quick brown cat sleeps under the big tree."

In [4]:
auto_complete_model = NGramAutoComplete(dummy_corpus, n=3)

In [5]:
context = "The quick brown"
predicted_word = auto_complete_model.predict_next_word(context)
print(f"Predicted next word: {predicted_word}")

Predicted next word: fox


In [6]:
generated_text = auto_complete_model.generate_text("The quick brown", max_words=5)
print(f"Generated text: {generated_text}")

Generated text: the quick brown fox jumps over the lazy
