<a href="https://colab.research.google.com/github/maneeshdisodia/DearML/blob/master/markov_chain.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
#ref :https://sookocheff.com/post/nlp/ngram-modeling-with-markov-chains/

In [0]:
s = "I am Sam. Sam I am. I do not like green eggs and ham."

In [0]:
tokens = s.split(" ")
  

In [0]:
bigrams = [(tokens[i],tokens[i+1]) for i in range(0,len(tokens)-1)]

In [0]:
bigrams

[('I', 'am'),
 ('am', 'Sam.'),
 ('Sam.', 'Sam'),
 ('Sam', 'I'),
 ('I', 'am.'),
 ('am.', 'I'),
 ('I', 'do'),
 ('do', 'not'),
 ('not', 'like'),
 ('like', 'green'),
 ('green', 'eggs'),
 ('eggs', 'and'),
 ('and', 'ham.')]

In [0]:
import random

In [0]:
random.sample(['am', 'am.', 'do'], 1)

['am']

In [0]:
class MarkovChain:

    def __init__(self):
        self.memory = {}

    def _learn_key(self, key, value):
        if key not in self.memory:
            self.memory[key] = []

        self.memory[key].append(value)

    def learn(self, text):
        tokens = text.split(" ")
        bigrams = [(tokens[i], tokens[i + 1]) for i in range(0, len(tokens) - 1)]
        for bigram in bigrams:
            self._learn_key(bigram[0], bigram[1])

    def _next(self, current_state):
        next_possible = self.memory.get(current_state)

        if not next_possible:
            next_possible = self.memory.keys()

        return random.sample(next_possible, 1)[0]

    def babble(self, amount, state=''):
        if not amount:
            return state

        next_word = self._next(state)
        return state + ' ' + self.babble(amount - 1, next_word)

In [0]:
m = MarkovChain()

In [0]:
m.learn('I am Sam.')

In [0]:
m.memory

{'I': ['am'], 'am': ['Sam.']}

In [0]:
m.learn('I am Kevin.')

In [0]:
m.memory

{'I': ['am', 'am'], 'am': ['Sam.', 'Kevin.']}

In [0]:
m.babble(5,'the')

'the probability of bigrams and babble'

In [0]:
m.learn('Putting it all together we have a simple Markov Chain that can learn bigrams and babble text given the probability of bigrams that it has learned. Markov Chain’s are a simple way to store and query n-gram probabilities. Full source code for this example follows.')

In [0]:
m.memory

{'Chain': ['that'],
 'Chain’s': ['are'],
 'Full': ['source'],
 'I': ['am', 'am'],
 'Markov': ['Chain', 'Chain’s'],
 'Putting': ['it'],
 'a': ['simple', 'simple'],
 'all': ['together'],
 'am': ['Sam.', 'Kevin.'],
 'and': ['babble', 'query'],
 'are': ['a'],
 'babble': ['text'],
 'bigrams': ['and', 'that'],
 'can': ['learn'],
 'code': ['for'],
 'example': ['follows.'],
 'for': ['this'],
 'given': ['the'],
 'has': ['learned.'],
 'have': ['a'],
 'it': ['all', 'has'],
 'learn': ['bigrams'],
 'learned.': ['Markov'],
 'n-gram': ['probabilities.'],
 'of': ['bigrams'],
 'probabilities.': ['Full'],
 'probability': ['of'],
 'query': ['n-gram'],
 'simple': ['Markov', 'way'],
 'source': ['code'],
 'store': ['and'],
 'text': ['given'],
 'that': ['can', 'it'],
 'the': ['probability'],
 'this': ['example'],
 'to': ['store'],
 'together': ['we'],
 'way': ['to'],
 'we': ['have']}