In [1]:
from __future__ import division, print_function
import numpy as np
from hmmlearn.hmm import MultinomialHMM
import poetrytools
import itertools

In [2]:
%run 'preprocessing.ipynb'

In [3]:
def get_rhymes(words):
    num_words = len(words)
    rhymes = np.empty((num_words, num_words))
    
    for i in range(num_words):
        for j in (k for k in range(num_words) if not k == i):
            if poetrytools.rhymes(words[i], words[j]):
                rhymes[i][j] = 1
    return rhymes

In [4]:
quatrain_rhymes = get_rhymes(quatrain_words)
volta_rhymes = get_rhymes(volta_words)
couplet_rhymes = get_rhymes(couplet_words)

In [6]:
reversed_quatrain_model = MultinomialHMM(n_components=30, n_iter=20, verbose=True)
reversed_volta_model = MultinomialHMM(n_components=30, n_iter=20, verbose=True)
reversed_couplet_model = MultinomialHMM(n_components=30, n_iter=20, verbose=True)
reversed_quatrain_model.fit(np.concatenate(reversed_converted_quatrain_lines), quatrain_lengths)
reversed_volta_model.fit(np.concatenate(reversed_converted_volta_lines), volta_lengths)
reversed_couplet_model.fit(np.concatenate(reversed_converted_couplet_lines), couplet_lengths)

         1      -76043.6485             +nan
         2      -62168.5950      +13875.0535
         3      -62160.2169          +8.3781
         4      -62134.6831         +25.5338
         5      -62058.0582         +76.6249
         6      -61852.4040        +205.6542
         7      -61432.8589        +419.5451
         8      -60868.2977        +564.5613
         9      -60365.9882        +502.3095
        10      -60057.9242        +308.0640
        11      -59913.5757        +144.3485
        12      -59833.9126         +79.6631
        13      -59751.1689         +82.7437
        14      -59618.5991        +132.5697
        15      -59408.8899        +209.7092
        16      -59155.9489        +252.9410
        17      -58933.9952        +221.9537
        18      -58768.6535        +165.3417
        19      -58629.9438        +138.7098
        20      -58477.3471        +152.5966
         1      -35831.0521             +nan
         2      -30165.6166       +5665.4354
         3

MultinomialHMM(algorithm='viterbi', init_params='ste', n_components=30,
        n_iter=20, params='ste',
        random_state=<mtrand.RandomState object at 0x111e745a0>,
        startprob_prior=1.0, tol=0.01, transmat_prior=1.0, verbose=True)

In [9]:
def generate_line(model, words, seed=None, rhymes=None):
    syllables_left = 10
    last_stress = 0
    
    line= []
    start_probs = model.startprob_
    emission_probs = model.emissionprob_
    transition_probs = model.transmat_
    
    start_state = np.random.choice(len(start_probs), p = start_probs)
    
    if seed is not None:
        possible_start_emissions = np.where(rhymes[seed] == 1)
        probs = np.array(emission_probs[start_state][possible_start_emissions])

        scaled_probs = probs / sum(probs)
        while True:
            start_emission = np.random.choice(possible_start_emissions[0], p=scaled_probs)
            start_stress = poetrytools.stress(words[start_emission])
            if int(start_stress[-1]) == 1:
                break
    else:
        start_emission = np.random.choice(len(emission_probs[start_state]), p=emission_probs[start_state])
    
    line.append(start_emission)
    start_stress = poetrytools.stress(words[start_emission])
    syllables_left -= len(start_stress)
    
    if len(start_stress) == 1:
        prev_starting_stress = 1
    else:
        prev_starting_stress = int(start_stress[0])

    curr_state = start_state
    while syllables_left > 0:
        possible_transitions = transition_probs[curr_state]
        curr_state = np.random.choice(len(possible_transitions), p=possible_transitions)
        possible_emissions = emission_probs[curr_state]
        while True:
            curr_emission = np.random.choice(len(possible_emissions), p=possible_emissions)
            curr_stress = poetrytools.stress(words[curr_emission])
            if len(curr_stress) == 1:
                prev_starting_stress = 1 - prev_starting_stress
                syllables_left -= 1
                break
            elif len(curr_stress) > syllables_left or int(curr_stress[-1]) == prev_starting_stress:
                continue
            else:
                prev_starting_stress = int(curr_stress[0])
                syllables_left -= len(curr_stress)
                break
        line.append(curr_emission)

    return line

def convert_line(sample, words):
    ret = ''
    i = 0
    for word in reversed(sample):
        curr_word = words[word]
        if i == 0 or len(curr_word) == 1:
            ret += curr_word.title() + ' '
        else:
            ret += curr_word + ' '
        i += 1
    return ret

def generate_pair(model, words, rhymes):
    while True:
        a_line = generate_line(model, words)
        seed = a_line[0]
        if len(np.where(rhymes[seed] == 1)[0]) > 0:
            b_line = generate_line(model, words, seed, rhymes)
            return a_line, b_line
        
def generate_rhyming_and_meter_sonnet():
    sonnet = ''
    a_lines = []
    b_lines = []
    
    for _ in range(4):
        a_line, b_line = generate_pair(reversed_quatrain_model, quatrain_words, quatrain_rhymes)
        a_lines.append(a_line)
        b_lines.append(b_line)
    
    for i in range(2):
        sonnet += convert_line(a_lines[2 * i], quatrain_words) + '\n'
        sonnet += convert_line(a_lines[2 * i + 1], quatrain_words) + '\n'
        sonnet += convert_line(b_lines[2 * i], quatrain_words) + '\n'
        sonnet += convert_line(b_lines[2 * i + 1], quatrain_words) + '\n'
    
    a_lines = []
    b_lines = []
    
    for _ in range(2):
        a_line, b_line = generate_pair(reversed_volta_model, volta_words, volta_rhymes)
        a_lines.append(a_line)
        b_lines.append(b_line)
    
    sonnet += convert_line(a_lines[0], volta_words) + '\n'
    sonnet += convert_line(a_lines[1], volta_words) + '\n'
    sonnet += convert_line(b_lines[0], volta_words) + '\n'
    sonnet += convert_line(b_lines[1], volta_words) + '\n'
    
    a_line, b_line = generate_pair(reversed_couplet_model, couplet_words, couplet_rhymes)
    sonnet += convert_line(a_line, couplet_words) + '\n'
    sonnet += convert_line(b_line, couplet_words) + '\n'
    
    return sonnet

def generate_10_rhyming_and_meter_sonnets():
    sonnets = ''
    for i in range(10):
        sonnets += str(i) + '\n' + generate_rhyming_and_meter_sonnet() + '\n'
    
    f = open("project2data/rhyming_and_meter_shakespeare.txt","w")
    f.write(sonnets)
    return sonnets

In [10]:
print(generate_10_rhyming_and_meter_sonnets())

0
I that say have confess of two burn ride 
Thy gift that hurt truth mine than thee faces 
Thou good yea follow breath days dare might side 
Love it find A how not on do roses 
The though their and in to the love plants knit 
The honey they the with that beauty filed 
Of good unlearned as love yet from wit 
That which on thou the stay did beauty old 
This own be lines stretched to me beauty men 
My what hate's best be truth known even 
To the impair and and 'gainst now to pen 
Love'S on true of would him their in my been 
Eyes to I you lest all thou dost see love 
Feed to used worse belongs to day so of 

1
Shalt to and as too our in past disgrace 
My bonds cures ruined grace the birds suppressed 
Eyelids and goddess A my happy grace 
To to slave though seen overpartial most 
The win am or thy might do judgement ill 
With weakens doth bond it in woman's chase 
He thee repose signs worth make they to fill 
To to mind pity linger his birds face 
Self rich made not to waste become with pa

In [141]:
# checks if line is in iambic pentameter (i.e. 0101010101 stress pattern)
def check_iambic_pentameter(line):
    # get the stresses from cmu dict 
    # if word is 1 syllable, then have the option for it to be stressed or unstressed
    stresses = []
    for i in line.split(' '):
        stress = poetrytools.stress(i)
        if len(stress) == 1:
            stresses.append(['0','1'])
        else:
            stresses.append([stress])
    
    # make combination of all possible stress patterns
    result = [[]]
    final = []
    for pool in stresses:
        result = [x+[y] for x in result for y in pool]
    final = [''.join(i) for i in result]
    
    # return if any pattern fits iambic pentameter 
    return ('0101010101' in final)