In [1]:
import numpy as np
import nltk
import os

In [2]:
nltk.download('genesis')

[nltk_data] Downloading package genesis to
[nltk_data]     C:\Users\ADMIN\AppData\Roaming\nltk_data...
[nltk_data]   Package genesis is already up-to-date!


True

## Creating a Transition Table

In [3]:
def generateTable(data,k=4):
    T={}
    for i in range(len(data)-k):
        x=data[i:i+k]
        y=data[i+k]
        
        if T.get(x) is None:
            T[x]={}
            T[x][y]=1
        else:
            if T[x].get(y) is None:
                T[x][y]=1
            else:
                T[x][y]+=1
    return T

In [4]:
T=generateTable("hello hello helli")

In [5]:
print(T)

{'hell': {'o': 2, 'i': 1}, 'ello': {' ': 2}, 'llo ': {'h': 2}, 'lo h': {'e': 2}, 'o he': {'l': 2}, ' hel': {'l': 2}}


In [6]:
def convertFrequencyIntoProb(Transition_Table):
    for kx in Transition_Table.keys():
        s=float(sum(Transition_Table[kx].values()))
        for i in Transition_Table[kx].keys():
            Transition_Table[kx][i]=Transition_Table[kx][i]/s
    return Transition_Table

In [7]:
T=convertFrequencyIntoProb(T)

In [8]:
print(T)

{'hell': {'o': 0.6666666666666666, 'i': 0.3333333333333333}, 'ello': {' ': 1.0}, 'llo ': {'h': 1.0}, 'lo h': {'e': 1.0}, 'o he': {'l': 1.0}, ' hel': {'l': 1.0}}


### Sampling

In [9]:
## example of sampling
fruits=["apple","mango","banana"]
#for i in range(9):
   # print(np.random.choice(fruits))

## sampling is done according to probability of each item if not given it gives equal probability to each item.

probs=[0.7,0.2,0.1]
for i in range(9):
    print(np.random.choice(fruits,p=probs))

apple
banana
apple
banana
apple
apple
apple
apple
mango


## Generating Text For Model

In [10]:
from bs4 import BeautifulSoup

In [11]:
with open("Apna Time Aayega.txt") as f:
    bs=BeautifulSoup(f.read(),'html.parser')
    data=bs.get_text()

In [12]:
data

'Apna Time Aayega\nUth ja apni raakh se\nTu udd ja ab talaash mein\nParwaaz dekh parwane ki\nAasman bhi sar uthayega\nAayega apna time aayega\nMere jaisa shaana lala\nTujhe na mil payega\n \nYeh shabdon ka jwala\nMeri bediyan pighlayega\nJitna toone boya hai tu\nUtna hi to khaayega\nAisa mera khwaab hai\nJo darr ko bhi satayega\nZinda mera khwaab\nAb kaise tu dafnayega\n \nAb hausle se jeene de\nAb khauf nahi hai seene mein\nHar raaste ko cheerenge\nHum kaamyabi chheenenge\nSab kuchh mila paseene se\nMatlab bana ab jeene mein\n \nKyon\nKyon ki apna time aayega\n \nTu nanga hi to aaya hai\nKya ghanta lekar jaayega\nApna time aayega\nApna time aayega\nApna time aayega x (2)\n \nTu nanga hi to aaya hai\nKya ghanta lekar..\n \nKissi ka hath nahi tha sar par\nYahaan par aaya khud ki mehnat se main\nJitni taakat kismat mein nahi\nUtni rehmat mein hai\nPhir bhi ladka sehmat nahin hai\nKyon ki hairat nahi hai\nZaroorat yahaan marzi ki\nAur jurrat ki hai\n \nTaakat ki hai, aafat ki\nHimaakat ki

### Train Our Markov Chain

In [13]:
def trainmarkovChain(text,k=4):
    T=generateTable(text,k)
    T=convertFrequencyIntoProb(T)
    return T

In [14]:
def sample_text(ctx,T,k):
    ctx=ctx[-k:]
    
    if T.get(ctx) is None:
        return " "
    possible_chars=list(T[ctx].keys())
    possible_values=list(T[ctx].values())
    
    return np.random.choice(possible_chars,p=possible_values)

In [15]:
Trained=trainmarkovChain(data)

In [16]:
def generateText(starting,T,k,maxlen=2000):
    np.random.seed(11)
    sent=starting
    ctx=starting[-k:]
    
    for ix in range(maxlen):
        next_pred=sample_text(ctx,T,k)
        sent += next_pred
        ctx=sent[-k:]
    return sent

In [17]:
predictions=generateText("apna",Trained,4)

In [21]:
print(predictions)

apna Time Aayega
Jitna time aayega
Jitni taakat ki
Jeetne ki
Mohobbat ki
Jeetne ki
Mohobbat ki
Mohobbat ki, ibaadat ki
Mohobbat ki harkat di harkat nahi tha sar uthayega x (2)
 
Taakat di hai, aafat ki ab aadat ki apni rehmat mein
 
Kyon
Kyon?
 
Tu nanga hi to aaya hath nahi hairat ki, ibaadat ki ab talaash mein
 
Kyon?
 
Tu nanga hi to aaya khwaab
Ab khaayega
Apna time aayega
Apna time aayega apna time aaya khud ki ab talaash mein nahi hai
 
Iss hardâ€™ch nai hai
 
Iss harkat ki
Mohobbat ki
Mohobbat ki
Jeetne ki
Aur jurrat ki, amaana lala
Tujhe na mil payega
Jitni raaste ko bhi ladka sehmat mein nahin hai
Kyon?
 
Taakat ki
Aasman bhi ladka sehmat mein nahi
Utna hi to khaayega
 
Yeh shabdon ka hai
Zaroorat ki
Himaakat di hairat ki, ibaadat ki hai
Kya ghanta lekar jaayega
Apna time aayega
 
Tu nanga hi barkat ki
Adalat yahaan par
Yahaan marzi ki
Jeetne ki
Jeetne ki mehnat se main
Jitna hi to aayega
 
Ab hai
Kya tu dafnayega
Mere jaisa mera khud ki hai
Kya ghanta lekar jaayega
Kya tu gha

In [20]:
with open("predictions.txt",mode='w',newline='\n') as f:
    f.write(predictions)