### Markov Chains
### Solution

In [91]:
import numpy as np

### Data Preparation

In [194]:
def load_text(filename):
    #loads a text file, and returns text
    with open(filename) as f:
        return f.read()

text1 = load_text('./Data_Rap/lyrics1.txt')
text2 = load_text('./Data_Rap/lyrics2.txt')

In [195]:
def combine_text(*docs):
    """combines multiple text files into single data string"""
    data = ""
    for doc in docs:
        data += ' '.join([word.lower() for word in doc.split('\n')])
    return data

In [196]:
data = prepare_data(text1,text2)

### Markov Chain 

In [95]:
def generateTransitionTable(data,order=4):
    """Creates a Transition Table, which stores the ctx-nextletter frequency, in nested dictionary"""
    T = {}
    for ix in range(len(data)-order):
        #get the current 'order' sized window to create ctx-nextletter pair in Transtion Table
        ctx = data[ix:ix+order]
        
        #get the next letter in seqence
        next_letter = data[ix+order]

        if T.get(ctx) is None:
            #if the ctx doesn't exist in transition table, then create one.
            T[ctx] = {}
            T[ctx][next_letter] = 1
        else:
            # if transition pair ctx-next_letter doesn't exist, create one otherwise update the old frequency by1
            if T[ctx].get(next_letter) is None:
                T[ctx][next_letter] = 1
            else:
                T[ctx][next_letter] += 1
                
        # Convert Numbers into Probabilities(scale everything between 0 to 1)
                
    return T

In [96]:
def convertFreqIntoProb(T):
    """Converts Frequencies into Probabilities"""
    for kx in T.keys():
        s = float(sum(T[kx].values()))
        for k in T[kx].keys():
            T[kx][k] = T[kx][k]/s    
            
    return T

In [198]:
def trainMarkovChain(data):
    T = generateTransitionTable(data)
    T = convertFreqIntoProb(T)
    return T

T = trainMarkovChain(data)

### Sampling at Test Time

In [199]:
def sample_next(ctx,T):
    """Samples a new character based on past context, and prob distribution defined in T[ctx]"""
    possible = T.get(ctx)
    if possible is None:
        return " "
    possible_letters = list(possible.keys())
    #Sampling according to the probability distribution
    keys_probs = [possible[kx] for kx in possible_letters]
    return np.random.choice(possible_letters,p=keys_probs)


### Generate Text

In [224]:
def generateText(ctx,maxLen=500):
    np.random.seed(1)
    sentence = '' + ctx
    order = 4
    for ix in range(maxLen):
        next_letter = sample_next(ctx,T)
        sentence += next_letter
        ctx = sentence[-order:]
    
    return sentence

### Test your results 

In [225]:
sentence = generateText("sing")

In [226]:
print(sentence)

sing but i'm realized it'll be indecent i'm gun from the fuckin' when think i'm everythinks theâ€”)  i still with) this chokehold on thing in you and i knock nod to snap any verse as rappidy ramen how can't gonna know what if he looked like it's not then paint to one homage, pupil in floating me like where just what i scream well, to go easy one chasin' and pay hone it with it lace long off it (six mind that i do where wack it occurs too 'cause that i probably ready to you off! screw the ball high w
