In [1]:
import nltk
import string
from HMM import supervised_HMM, unsupervised_HMM, HiddenMarkovModel
import re

In [2]:
def load_poems(filename):
    
    lines = [] # 2d dictionary, each array is a split + cleaned line
    words = {} # dictionary of a word, and its frequency
    
    file = open(filename, 'r')
    
    for line in file:
        line = line.strip()
        if  len(line) < 10:
            # Too short to be a valid line
            continue
        line = "".join(l for l in line if l not in string.punctuation)
        line = line.lower()
        line = line.split()
        
        lines.append(line)

        for word in line:
            try:
                # add to frequency if the word is already in the dic
                words[word] += 1
            except KeyError:
                # if not, add the word to the dic
                words[word] = 1
    return lines, words

In [3]:
file = "data/shakespeare.txt"
lines, words = load_poems(file)

In [4]:
lines[0]

['from', 'fairest', 'creatures', 'we', 'desire', 'increase']

In [5]:
def unsupervised_learning(lines, n_states, n_iters):
    '''
    n_iters: Number of iterations we should go through.
    n_states: Number of hidden states our HMM should have.
    '''
    # Train the HMM.
    obs, obs_map =  parse_observations(lines)
    flat_lines = [[item] for sublist in lines for item in sublist]
    leHMM = unsupervised_HMM(obs, n_states, n_iters)
    return leHMM, obs,obs_map 
    

In [6]:
def obs_map_reverser(obs_map):
    obs_map_r = {}

    for key in obs_map:
        obs_map_r[obs_map[key]] = key

    return obs_map_r

In [7]:
def parse_observations(lines):

    obs_counter = 0
    obs = []
    obs_map = {}

    for line in lines:
        obs_elem = []

        for word in line:
            word = re.sub(r'[^\w]', '', word).lower()
            if word not in obs_map:
                # Add unique words to the observations map.
                obs_map[word] = obs_counter
                obs_counter += 1

            # Add the encoded word.
            obs_elem.append(obs_map[word])

        # Add the encoded sequence.
        obs.append(obs_elem)

    return obs, obs_map

In [8]:
obs, obs_map = parse_observations(lines)

In [9]:
# get syllable info from syllable_dictionary.txt
def load_syllables(filename):
    file = open(filename, 'r')
    syllable = {}
    for line in file:
        line = line.split()
        #print(line)
        word = line[0]
        rest = line[1: len(line)]

        syllable[word] = rest
    return syllable

In [10]:
filename = "data/Syllable_dictionary.txt"
syllable = load_syllables(filename)

In [11]:
def save_HMM(hmmmmmm, filename):
    
    with open(filename+".txt", "w+") as filept:
        filept.write(str(hmmmmmm.L)+"\n")
        filept.write(str(hmmmmmm.D)+"\n")
        for i in hmmmmmm.A:
            line = ""
            for j in i:
                line += str(j) + ","
            filept.write(line[:len(line)-1]+"\n")
        for i in hmmmmmm.O:
            line = ""
            for j in i:
                line += str(j) + ","
            filept.write(line[:len(line)-1]+"\n")
        

def read_HMM(filename):
    with open(filename+".txt", "r") as filept:
        L = int(filept.readline())
        D = int(filept.readline())
        O = []
        A = []
        for i in range(L):
            line = [float(x) for x in filept.readline().split(",")]
            A.append(line)
        for j in range(L):
            line = [float(x) for x in filept.readline().split(",")]
            O.append(line)
    return HiddenMarkovModel(A, O)

In [12]:
testHMM40 = read_HMM("40-iter-8-hidden-hmm")

In [13]:
obs_map_r = obs_map_reverser(obs_map)

In [14]:
for i in range(14): # each poem is 14 lines long
    emission = testHMM40.generate_emission_syllables(10, obs_map_r, syllable) # each line is 10 words long
    sentence = [obs_map_r[i] for i in emission[0]]

    print(' '.join(sentence).capitalize())

Be and for ten bring all hast have my makes
On part shall the of frame thy with change thou
O but strange are my face state the my rose
Than to no say all have is woe grow let
The the that first on mightier canker hide
Line i and stole thou is worth would show woe
I father what like seeing is be may
Thy eyes to of changing rose for glance to
As bosom yore deserved wights pleasure graces
They far i i shows bad antique i be
Heart purpose from for my name from bright full
Place believe not a general are and my
For o hand it nor or o mind owes so
Resembling how he more grace born where that


In [15]:
for i in range(14): # each poem is 14 lines long
    emission = testHMM40.generate_emission_syllables_other(10, obs_map_r, syllable) # each line is 10 words long
    sentence = [obs_map_r[i] for i in emission[0]]

    print(' '.join(sentence).capitalize())

Sea mine past are ere seem wights user his
It for did sovereign hours my day now both
Back thou credit a more record wealth deaths
Your of this thou as else as gracious
I antiquity my you addition
Of my thou a treasure to a to his
Unbless true true it it thee my sweets well
Forbear when although that by black thou doth
We store better most my love love public
Face or thee and heals critic confined which
Under be fair well by holds being see so
My then in cost or you true me of his
Thy with that none sheds informer map which
Single for the of with i self shall hot


In [16]:
for i in range(14): # each poem is 14 lines long
    emission = testHMM40.generate_emission_syllables_correct(10, obs_map_r, syllable) # each line is 10 words long
    sentence = [obs_map_r[i] for i in emission[0]]

    print(' '.join(sentence).capitalize())

Sleep i in with of the confounds times from
Of he do true things augurs bars and brought
Thy alone wanting make profaned from this
No a kill of is the am thereof thee
The often titles flower inward self thy
Hath art i truth the those have praise razed self
On and doing but have seen famished his
Gilding left his this proud you home of thou
Wrong right hours vowing of this be yet come
Nor born overturn she threw a by that
Others forth doth centre nor work that eye
Odour strikes paying the to the death or
As against my proud it self scythe to for
Low as half sheaves not gone to fester would
