In [88]:
import string, random
import pprint

In [93]:
class MarkovChain():
    def __init__(self, keep_formatting=True):
        self.model = {}
        self.keep_formatting = keep_formatting
    
    def _add_to_model(self, prev, word):
        if prev not in self.model:
            self.model[prev] = {}
        if word in self.model[prev]:
            self.model[prev][word] += 1
        else:
            self.model[prev][word] = 1
    
    def train(self, data):
        for sentence in data:
            if not self.keep_formatting:
                s_lower = sentence.lower()
                s_nopunc = s_lower.translate(str.maketrans('', '', string.punctuation))
                word_arr = s_nopunc.split()
            else:
                word_arr = sentence.split()
            prev = '/start'
            for word in word_arr:
                self._add_to_model(prev, word)
                prev = word
            self._add_to_model(prev, '/end')
            
    def generate(self, k=10):
        output_data = []
        for i in range(k):
            word_arr = []
            word = '/start'
            while True:
                choices = []
                weight = []
                for k, v in self.model[word].items():
                    choices.append(k)
                    weight.append(v)
                word = random.choices(choices, weights=weight)[0]
                if word != '/end':
                    word_arr.append(word)
                else:
                    break
            sentence= ' '.join(word_arr)
            output_data.append(sentence)
        return output_data


In [95]:
pp = pprint.PrettyPrinter(indent=4)
mc = MarkovChain()
data = ["The cat is big",
        "The cat, is small.",
        "The dog is big.",
        "Cat is really big"]
mc.train(data)
pp.pprint(mc.model)
new_data = mc.generate(k=20)
[print(x) for x in new_data]


{   '/start': {'Cat': 1, 'The': 3},
    'Cat': {'is': 1},
    'The': {'cat': 1, 'cat,': 1, 'dog': 1},
    'big': {'/end': 2},
    'big.': {'/end': 1},
    'cat': {'is': 1},
    'cat,': {'is': 1},
    'dog': {'is': 1},
    'is': {'big': 1, 'big.': 1, 'really': 1, 'small.': 1},
    'really': {'big': 1},
    'small.': {'/end': 1}}
Cat is big.
Cat is big.
The cat, is big
The dog is big.
The dog is big.
The cat is small.
The dog is big.
The dog is really big
Cat is big
The dog is big
The dog is small.
The dog is really big
Cat is big.
The cat, is small.
The cat is really big
The dog is big
Cat is big.
The cat is small.
The cat is small.
The dog is big.


[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None]