In [8]:
import random
import re
from collections import defaultdict

class MarkovChainTextGenerator:
    def __init__(self, mode="word", n=2):
        """
        :param mode: "word" or "char" (type of Markov chain)
        :param n: size of n-grams (default=2 â†’ bigram)
        """
        self.mode = mode
        self.n = n
        self.model = defaultdict(list)

    def train(self, text):
        """
        Train the Markov chain model on input text.
        :param text: input string
        """
        if self.mode == "word":
            sequence = re.findall(r"\b\w+\b", text.lower())  # tokenize words
        elif self.mode == "char":
            sequence = list(text)  # split into characters
        else:
            raise ValueError("Mode must be 'word' or 'char'")

        for i in range(len(sequence) - self.n):
            key = tuple(sequence[i:i + self.n - 1])
            next_item = sequence[i + self.n - 1]
            self.model[key].append(next_item)

    def generate(self, length=50, seed=None):
        """
        Generate new text using the trained model.
        :param length: number of tokens (words/chars) in output
        :param seed: starting token(s)
        """
        if not self.model:
            raise ValueError("Model is empty. Train it first!")

        if seed is None:
            seed = random.choice(list(self.model.keys()))
        elif isinstance(seed, str) and self.mode == "word":
            seed = tuple([seed.lower()])
        elif isinstance(seed, str) and self.mode == "char":
            seed = tuple([seed])

        output = list(seed)

        for _ in range(length - len(seed)):
            state = tuple(output[-(self.n - 1):])  # last n-1 tokens
            if state in self.model:
                next_item = random.choice(self.model[state])
            else:
                next_item = random.choice(random.choice(list(self.model.values())))
            output.append(next_item)

        # Convert back to string
        if self.mode == "word":
            return " ".join(output)
        else:
            return "".join(output)


# ---------------- Example Usage ----------------
if __name__ == "__main__":
    sample_text = """
    Artificial Intelligence is the future of technology.
    Machine learning and deep learning are subsets of AI.
    Markov chains are used for text generation in natural language processing.
    """

    print("=== Word-level Markov Chain ===")
    word_gen = MarkovChainTextGenerator(mode="word", n=2)
    word_gen.train(sample_text)
    print(word_gen.generate(length=30))

    print("\n=== Character-level Markov Chain ===")
    char_gen = MarkovChainTextGenerator(mode="char", n=3)
    char_gen.train(sample_text)
    print(char_gen.generate(length=200))



=== Word-level Markov Chain ===
technology machine learning and deep learning and deep learning and deep learning and deep learning and deep learning are subsets of technology machine learning are used for text generation in

=== Character-level Markov Chain ===
p leare is of AI.
           Mare used deep learnin natificial Intext ge is of AI.
    AI.
       Machins arnine proce used deep learning.
    AI.
      Machaing.
    Marning.
       Marnins the usets
