In [1]:
import random
import re
from collections import defaultdict

In [2]:
sample_text = """
Long ago in a kingdom of golden towers there lived a young gladiator with a kind heart.
Though trained to fight in the grand arena the gladiator dreamed of peace and freedom.
Every battle was watched by nobles villagers and wandering bards who sang of courage.
Unlike other warriors he fought not for glory but to protect the innocent and weak.

One evening a wounded fairy appeared beneath the arena sands glowing with silver light.
The fairy revealed that a dark curse had fallen upon the kingdom and the king was unaware.
Only a warrior bound by honor and compassion could break the ancient spell.
The gladiator swore an oath to help the fairy even if it cost him his life.

Guided by moonlight and magic the gladiator journeyed beyond the city walls.
He crossed enchanted forests silent ruins and mountains guarded by stone giants.
In the final battle he faced a shadow beast born of fear and forgotten promises.
With courage wisdom and the fairy blessing the gladiator shattered the curse.

The arena was never the same again and neither was the kingdom.
The gladiator was freed and remembered not as a fighter but as a hero of legend.
Fairies still whisper his story to children beneath the stars.
"""

def tokenize(text):
    """Clean and tokenize text"""
    return re.findall(r'\b\w+\b', text.lower())

words = tokenize(sample_text)
print(f"Total words in corpus: {len(words)}")


def build_second_order_markov(words):
    chain = defaultdict(list)
    for i in range(len(words) - 2):
        key = (words[i], words[i + 1])
        next_word = words[i + 2]
        chain[key].append(next_word)
    return chain

markov_chain = build_second_order_markov(words)


print("\nSample Markov Chain entries:")
for key in list(markov_chain.keys())[:5]:
    print(f"{key} -> {markov_chain[key]}")


def generate_text(chain, length=50, seed=None):
    if seed is None:
        seed = random.choice(list(chain.keys()))

    w1, w2 = seed
    generated_words = [w1, w2]

    for _ in range(length - 2):
        key = (generated_words[-2], generated_words[-1])
        next_words = chain.get(key)

        if not next_words:
            break

        next_word = random.choice(next_words)
        generated_words.append(next_word)

    return " ".join(generated_words)


if __name__ == "__main__":
    print("\nGenerated Text:\n")
    text = generate_text(markov_chain, length=40)
    print(text)

Total words in corpus: 210

Sample Markov Chain entries:
('long', 'ago') -> ['in']
('ago', 'in') -> ['a']
('in', 'a') -> ['kingdom']
('a', 'kingdom') -> ['of']
('kingdom', 'of') -> ['golden']

Generated Text:

the arena sands glowing with silver light the fairy revealed that a dark curse had fallen upon the kingdom and the fairy blessing the gladiator dreamed of peace and freedom every battle was watched by nobles villagers and wandering bards
