A simple implementation of a Markov Chain text generator in Python inspired by an [implementation](https://golang.org/doc/codewalk/markov/) in Go.

In [4]:
import random

class MarkovChain:
    
    END = '_END_'
    
    def __init__(self, k=2, tokenizer=None):
        """Initialize a Markov Chain with k prefix elements (k-gram)"""
        
        self.k = k
        if tokenizer is None:
            self.tokenizer = self.default_tokenizer
        self.chain = {}
        
    
    def feed(self, text):
        """Feeds the chain with input text"""
        tokens = self.tokenizer(text) + [self.END]
        
        for i in range(len(tokens) - self.k):
            prefix = " ".join(tokens[i:i+self.k])
            self.chain.setdefault(prefix, [])
            self.chain[prefix].append(tokens[i+self.k])

    
    def generate(self, seed_text, maxlen=100):
        """Generates new text from the given seed"""
        
        seed_tokens = self.tokenizer(seed_text)
        assert len(seed_tokens) == self.k, f"Invalid seed length, must be of length {self.k}"
        
        text = [] + seed_tokens
        
        for i in range(maxlen):
            prefix = " ".join(text[i:i+self.k])
            choices = self.chain.setdefault(prefix, [])
            
            if not choices:
                # Otherwise, we have nothing left to build
                return text
            
            next_token = random.choice(choices)
            
            if next_token == self.END:
                return self.format_output(text)
            else:
                text += [next_token]
        
        return self.format_output(text)
        
    
    @staticmethod
    def format_output(tokens):
        """Formats the list of tokens for output"""
        return " ".join(tokens)
    
    @staticmethod
    def default_tokenizer(text):
        """Default tokenizer splitting on spaces"""
        return text.split()

In [5]:
mc = MarkovChain()

In [6]:
texts = [
    "Socrates was a man",
    "Plato was a man",
    "Aristotle was a man",
    "He was a man and a thinker",
    "Scooby Doo was a dog",
    "I am not a number! I am a free man!"
]

for text in texts:
    mc.feed(text)
    
for _ in range(10):
    print(mc.generate("Socrates was"))

Socrates was a dog
Socrates was a man and a thinker
Socrates was a man
Socrates was a man and a thinker
Socrates was a dog
Socrates was a man
Socrates was a man
Socrates was a dog
Socrates was a dog
Socrates was a man
