In [3]:
from __future__ import division, print_function
import numpy as np
from hmmlearn.hmm import MultinomialHMM
import poetrytools
import itertools

In [2]:
%run 'preprocessing.ipynb'

In [4]:
def get_rhymes(words):
    num_words = len(words)
    rhymes = np.empty((num_words, num_words))
    
    for i in range(num_words):
        for j in (k for k in range(num_words) if not k == i):
            if poetrytools.rhymes(words[i], words[j]):
                rhymes[i][j] = 1
    return rhymes

In [5]:
quatrain_rhymes = get_rhymes(quatrain_words)
volta_rhymes = get_rhymes(volta_words)
couplet_rhymes = get_rhymes(couplet_words)

In [6]:
reversed_quatrain_model = MultinomialHMM(n_components=30, n_iter=20, verbose=True)
reversed_volta_model = MultinomialHMM(n_components=30, n_iter=20, verbose=True)
reversed_couplet_model = MultinomialHMM(n_components=30, n_iter=20, verbose=True)
reversed_quatrain_model.fit(np.concatenate(reversed_converted_quatrain_lines), quatrain_lengths)
reversed_volta_model.fit(np.concatenate(reversed_converted_volta_lines), volta_lengths)
reversed_couplet_model.fit(np.concatenate(reversed_converted_couplet_lines), couplet_lengths)

         1      -76099.6745             +nan
         2      -62169.2797      +13930.3948
         3      -62162.2341          +7.0456
         4      -62139.8212         +22.4129
         5      -62065.0901         +74.7310
         6      -61820.7373        +244.3529
         7      -61229.9313        +590.8059
         8      -60517.7265        +712.2048
         9      -60108.3643        +409.3623
        10      -59933.0495        +175.3148
        11      -59832.9117        +100.1377
        12      -59740.5058         +92.4059
        13      -59622.1473        +118.3586
        14      -59452.1301        +170.0172
        15      -59224.0314        +228.0987
        16      -58982.8937        +241.1377
        17      -58783.2113        +199.6824
        18      -58615.3448        +167.8664
        19      -58429.0601        +186.2847
        20      -58171.2201        +257.8400
         1      -35709.5354             +nan
         2      -30166.3691       +5543.1664
         3

MultinomialHMM(algorithm='viterbi', init_params='ste', n_components=30,
        n_iter=20, params='ste',
        random_state=<mtrand.RandomState object at 0x10ea5a140>,
        startprob_prior=1.0, tol=0.01, transmat_prior=1.0, verbose=True)

In [37]:
def generate_line(model, words, seed=None, rhymes=None):
    syllables_left = 10
    last_stress = 0
    
    line= []
    start_probs = model.startprob_
    emission_probs = model.emissionprob_
    transition_probs = model.transmat_
    
    start_state = np.random.choice(len(start_probs), p = start_probs)
    
    if seed is not None:
        possible_start_emissions = np.where(rhymes[seed] == 1)
        probs = np.array(emission_probs[start_state][possible_start_emissions])

        scaled_probs = probs / sum(probs)

        start_emission = np.random.choice(possible_start_emissions[0], p=scaled_probs)
    else:
        start_emission = np.random.choice(len(emission_probs[start_state]), p=emission_probs[start_state])
    
    line.append(start_emission)
    start_stress = poetrytools.stress(words[start_emission])
    syllables_left -= len(start_stress)
    
    if len(start_stress) == 1:
        prev_starting_stress = 1
    else:
        prev_starting_stress = int(start_stress[0])

    curr_state = start_state
    while syllables_left > 0:
        possible_transitions = transition_probs[curr_state]
        curr_state = np.random.choice(len(possible_transitions), p=possible_transitions)
        possible_emissions = emission_probs[curr_state]
        while True:
            curr_emission = np.random.choice(len(possible_emissions), p=possible_emissions)
            curr_stress = poetrytools.stress(words[curr_emission])
            if len(curr_stress) == 1:
                
            elif len(curr_stress) > syllables_left or int(curr_stress[-1]) == prev_starting_stress:
                continue
            else:
                prev_starting_stress = curr_stress[0]
                syllables_left -= len(curr_stress)
                break
        line.append(curr_emission)

    return line

def convert_line(sample, words):
    ret = ''
    i = 0
    for word in reversed(sample):
        curr_word = words[word]
        if i == 0 or len(curr_word) == 1:
            ret += curr_word.title() + ' '
        else:
            ret += curr_word + ' '
        i += 1
    return ret

def generate_pair(model, words, rhymes):
    while True:
        a_line = generate_line(model, words)
        seed = a_line[0]
        if len(np.where(rhymes[seed] == 1)[0]) > 0:
            b_line = generate_line(model, words, seed, rhymes)
            return a_line, b_line
        
def generate_rhyming_and_meter_sonnet():
    sonnet = ''
    a_lines = []
    b_lines = []
    
    for _ in range(4):
        a_line, b_line = generate_pair(reversed_quatrain_model, quatrain_words, quatrain_rhymes)
        a_lines.append(a_line)
        b_lines.append(b_line)
    
    for i in range(2):
        sonnet += convert_line(a_lines[2 * i], quatrain_words) + '\n'
        sonnet += convert_line(a_lines[2 * i + 1], quatrain_words) + '\n'
        sonnet += convert_line(b_lines[2 * i], quatrain_words) + '\n'
        sonnet += convert_line(b_lines[2 * i + 1], quatrain_words) + '\n'
    
    a_lines = []
    b_lines = []
    
    for _ in range(2):
        a_line, b_line = generate_pair(reversed_volta_model, volta_words, volta_rhymes)
        a_lines.append(a_line)
        b_lines.append(b_line)
    
    sonnet += convert_line(a_lines[0], volta_words) + '\n'
    sonnet += convert_line(a_lines[1], volta_words) + '\n'
    sonnet += convert_line(b_lines[0], volta_words) + '\n'
    sonnet += convert_line(b_lines[1], volta_words) + '\n'
    
    a_line, b_line = generate_pair(reversed_couplet_model, couplet_words, couplet_rhymes)
    sonnet += convert_line(a_line, couplet_words) + '\n'
    sonnet += convert_line(b_line, couplet_words) + '\n'
    
    return sonnet

def generate_10_rhyming_and_meter_sonnets():
    sonnets = ''
    for i in range(10):
        sonnets += str(i) + '\n' + generate_rhyming_and_meter_sonnet() + '\n'
    
    f = open("project2data/rhyming_and_meter_shakespeare.txt","w")
    f.write(sonnets)
    return sonnets

In [38]:
print(generate_10_rhyming_and_meter_sonnets())

0
Some each fortune forth and admitted time 
Love that determination taker red 
So A my wherein showing coloured rhyme 
Wound the make but and was what with to spread 
Pity can contented his register 
The for thy solemn of it deaths is son 
I not as my frequent his character 
Poverty grey when vulgar the is won 
Having thee read to but thy gentle pride 
Making as I that the the ruminate 
Poor calls the as her thus then winter's side 
I lose be O every advocate 
Tyrants' what rid lived have be monument 
In wonder him are live heed with content 

1
And you on with inherit may forgot 
Joy that eyes being some thou throned born 
And my thou where after another not 
Love in like of canst not holds be forsworn 
Be A you and thou say where love recite 
Feast all and love why his neglected was 
Err of made I in and subjects delight 
And reproach persuade physicians faces 
The eyes then bending not love so A hand 
I or spring makes youth A advocate 
The fame pay hath forebemoaned kind 
And my i

In [141]:
# checks if line is in iambic pentameter (i.e. 0101010101 stress pattern)
def check_iambic_pentameter(line):
    # get the stresses from cmu dict 
    # if word is 1 syllable, then have the option for it to be stressed or unstressed
    stresses = []
    for i in line.split(' '):
        stress = poetrytools.stress(i)
        if len(stress) == 1:
            stresses.append(['0','1'])
        else:
            stresses.append([stress])
    
    # make combination of all possible stress patterns
    result = [[]]
    final = []
    for pool in stresses:
        result = [x+[y] for x in result for y in pool]
    final = [''.join(i) for i in result]
    
    # return if any pattern fits iambic pentameter 
    return ('0101010101' in final)