# Markov Chain
- Probablistic Model for Text/Natural Language Generation
- Simple and effective way of generating new text
            - Text
            - Lyrics
            - Story/Novel
            - Code

In [None]:
# Predict the next char
text = "the man was ....they...then....the ...the ... "

# X is the sequence of 'k = 3' and Y is predicted character or K+1 the character
"""
X     Y      Freq
the   " "    4
the   "n"    2
the   "y"    1
the   "i"    1
man   " "    1

"""

In [12]:
def generateTabel(data,k=4):
    
    T = {}
    for i in range(len(data)-k):
        X = data[i:i+k]
        Y = data[i+k]
        #print("X %s and Y %s "%(X,Y))
        
        if T.get(X) is None:
            T[X] = {}
            T[X][Y] = 1
        else:
            if T[X].get(Y) is None:
                T[X][Y] = 1
            else:
                T[X][Y] +=1
                
    return T

In [36]:
T = generateTabel("hello hello helli")
print(T)

{'hell': {'o': 2, 'i': 1}, 'ello': {' ': 2}, 'llo ': {'h': 2}, 'lo h': {'e': 2}, 'o he': {'l': 2}, ' hel': {'l': 2}}


In [37]:
def convertFreqIntoProb(T):
    for kx in T.keys():
        s = float(sum(T[kx].values()))
        for k in T[kx].keys():
            T[kx][k] = T[kx][k]/s
            #print(T[kx][k])
            
    return T

In [38]:
T = convertFreqIntoProb(T)
print(T)

{'hell': {'o': 0.6666666666666666, 'i': 0.3333333333333333}, 'ello': {' ': 1.0}, 'llo ': {'h': 1.0}, 'lo h': {'e': 1.0}, 'o he': {'l': 1.0}, ' hel': {'l': 1.0}}


In [43]:
# Working on large text i.e, PM Modi's Independane day speech

text_path = "./speeches/pm_modi_speech.txt"

def load_text(filename):
    with open(filename,encoding='utf8') as f:
        return f.read().lower()
    
text = load_text(text_path)

In [45]:
print(text[:1000])

my dear countrymen, i convey my best wishes to all of you on this auspicious occasion of independence day. today, the country is brimming with self-confidence. the country is scaling new heights by working extremely hard, with a resolve to realize its dreams. today’s dawn has brought a new spirit, a new enthusiasm, a new zeal and a new energy with it.

my dear countrymen, in our country, there is a neelakurinji flower which blooms once every 12 years. this year, neelakurinji is in full bloom on the hills of southern nilgiri like the ashok chakra (the wheel of ashoka) in the tricolour on our independence day.

my dear countrymen, we are celebrating this festival of independence at a time when our daughters from the states of uttarakhand, himachal pradesh, manipur, telangana and andhra pradesh, have come back after circumnavigating the seven seas. they have returned after unfurling the tricolour in the seven seas, colouring their waters with the hues of our tricolor.

my dear countrymen,

### Train our Markov Chain

In [48]:
def trainMarkovChain(text,k=4):
    
    T = generateTabel(text,k)
    T = convertFreqIntoProb(T)
    
    return T

In [52]:
model = trainMarkovChain(text)
print(model)

{'my d': {'e': 0.96875, 'i': 0.03125}, 'y de': {'a': 0.9117647058823529, 'd': 0.029411764705882353, 'v': 0.029411764705882353, 'c': 0.029411764705882353}, ' dea': {'r': 0.8888888888888888, 't': 0.08333333333333333, 'l': 0.027777777777777776}, 'dear': {' ': 1.0}, 'ear ': {'c': 0.5208333333333334, 'a': 0.10416666666666667, 'o': 0.0625, 'w': 0.020833333333333332, 'b': 0.25, 't': 0.020833333333333332, 'v': 0.020833333333333332}, 'ar c': {'o': 1.0}, 'r co': {'u': 0.88, 'n': 0.06, 'r': 0.02, 'o': 0.02, 'm': 0.02}, ' cou': {'n': 0.9322033898305084, 'p': 0.00847457627118644, 'l': 0.025423728813559324, 'r': 0.03389830508474576}, 'coun': {'t': 0.990990990990991, 'c': 0.009009009009009009}, 'ount': {'r': 0.9473684210526315, ' ': 0.03508771929824561, 'e': 0.008771929824561403, '.': 0.008771929824561403}, 'untr': {'y': 0.9814814814814815, 'i': 0.018518518518518517}, 'ntry': {'m': 0.29245283018867924, ' ': 0.49056603773584906, ',': 0.02830188679245283, '.': 0.12264150943396226, '’': 0.05660377358490

### Generate Text at Text Time!

## Sampling!!

In [53]:
import numpy as np

In [58]:
## Understanding sampling via example
fruits = ['apple','banana','mango']
prob = ["0.8","0.1","0.1"]

for i in range(10):
    ## sampling according a probablity distribution
    print(np.random.choice(fruits,p=prob))

apple
apple
apple
banana
apple
apple
apple
mango
apple
apple


In [64]:
def sample_next(ctx,T,k):
    ctx = ctx[-k:]
    if T.get(ctx) is None:
        return " "
    possible_chars = list(T[ctx].keys())
    possible_values = list(T[ctx].values())
    
    #print(possible_chars)
    #print(possible_values)
    
    return np.random.choice(possible_chars,p=possible_values)

In [77]:
sample_next("comm",model,4)

'e'

In [78]:
def generateText(starting_sent,k=4,maxLen=1000):
    
    sentence = starting_sent
    ctx = starting_sent[-k:]
    
    for ix in range(maxLen):
        next_prediction = sample_next(ctx,model,k)
        sentence += next_prediction
        ctx = sentence[-k:]
        
    return sentence

In [94]:
generateText("dear",k=4,maxLen=200)

'dear brothers, i want the dedicated respected to construction is supreme food grainful, and the started.\n\ntoday and sisters and to constitution. the rich she including, take three lakh eneration cabine an'

In [None]:
# we can generate code , speech , novel , news anything using Markov chains