In [1]:
from __future__ import division, print_function
import numpy as np
from hmmlearn.hmm import MultinomialHMM
import poetrytools
import itertools

In [2]:
%run 'preprocessing.ipynb'

In [3]:
def get_rhymes(words):
    num_words = len(words)
    rhymes = np.empty((num_words, num_words))
    
    for i in range(num_words):
        for j in (k for k in range(num_words) if not k == i):
            if poetrytools.rhymes(words[i], words[j]):
                rhymes[i][j] = 1
    return rhymes

In [4]:
quatrain_rhymes = get_rhymes(quatrain_words)
volta_rhymes = get_rhymes(volta_words)
couplet_rhymes = get_rhymes(couplet_words)

In [35]:
reversed_quatrain_model = MultinomialHMM(n_components=30, n_iter=50, verbose=True)
reversed_volta_model = MultinomialHMM(n_components=30, n_iter=50, verbose=True)
reversed_couplet_model = MultinomialHMM(n_components=30, n_iter=50, verbose=True)
reversed_quatrain_model.fit(np.concatenate(reversed_converted_quatrain_lines), quatrain_lengths)
reversed_volta_model.fit(np.concatenate(reversed_converted_volta_lines), volta_lengths)
reversed_couplet_model.fit(np.concatenate(reversed_converted_couplet_lines), couplet_lengths)

         1      -76127.4587             +nan
         2      -62169.0164      +13958.4424
         3      -62161.6852          +7.3311
         4      -62139.8518         +21.8334
         5      -62075.8032         +64.0485
         6      -61903.4710        +172.3323
         7      -61525.6623        +377.8087
         8      -60947.1629        +578.4994
         9      -60391.2399        +555.9230
        10      -60054.9712        +336.2688
        11      -59898.7616        +156.2096
        12      -59814.0838         +84.6778
        13      -59734.7823         +79.3015
        14      -59623.9225        +110.8598
        15      -59458.5824        +165.3401
        16      -59238.7651        +219.8173
        17      -59001.2101        +237.5549
        18      -58790.0781        +211.1321
        19      -58605.5207        +184.5574
        20      -58404.3310        +201.1897
        21      -58134.2876        +270.0433
        22      -57753.6637        +380.6239
        23

MultinomialHMM(algorithm='viterbi', init_params='ste', n_components=30,
        n_iter=50, params='ste',
        random_state=<mtrand.RandomState object at 0x111e745a0>,
        startprob_prior=1.0, tol=0.01, transmat_prior=1.0, verbose=True)

In [36]:
def generate_line(model, words, seed=None, rhymes=None):
    syllables_left = 10
    last_stress = 0
    
    line= []
    start_probs = model.startprob_
    emission_probs = model.emissionprob_
    transition_probs = model.transmat_
    
    start_state = np.random.choice(len(start_probs), p = start_probs)
    
    if seed is not None:
        possible_start_emissions = np.where(rhymes[seed] == 1)
        probs = np.array(emission_probs[start_state][possible_start_emissions])

        scaled_probs = probs / sum(probs)
        while True:
            start_emission = np.random.choice(possible_start_emissions[0], p=scaled_probs)
            start_stress = poetrytools.stress(words[start_emission])
            if len(start_stress) == 1 or int(start_stress[-1]) == 1 :
                break
    else:
        while True:
            start_emission = np.random.choice(len(emission_probs[start_state]), p=emission_probs[start_state])
            start_stress = poetrytools.stress(words[start_emission])
            if len(start_stress) == 1 or int(start_stress[-1]) == 1:
                break
    
    line.append(start_emission)
    start_stress = poetrytools.stress(words[start_emission])
    syllables_left -= len(start_stress)
    
    if len(start_stress) == 1:
        prev_starting_stress = 1
    else:
        prev_starting_stress = int(start_stress[0])

    curr_state = start_state
    while syllables_left > 0:
        possible_transitions = transition_probs[curr_state]
        curr_state = np.random.choice(len(possible_transitions), p=possible_transitions)
        possible_emissions = emission_probs[curr_state]
        while True:
            curr_emission = np.random.choice(len(possible_emissions), p=possible_emissions)
            curr_stress = poetrytools.stress(words[curr_emission])
            if len(curr_stress) == 1:
                prev_starting_stress = 1 - prev_starting_stress
                syllables_left -= 1
                break
            elif len(curr_stress) > syllables_left or int(curr_stress[-1]) == prev_starting_stress:
                continue
            else:
                prev_starting_stress = int(curr_stress[0])
                syllables_left -= len(curr_stress)
                break
        line.append(curr_emission)

    return line

def convert_line(sample, words):
    ret = ''
    i = 0
    for word in reversed(sample):
        curr_word = words[word]
        if i == 0 or (curr_word == 'i'):
            ret += curr_word.title() + ' '
        else:
            ret += curr_word + ' '
        i += 1
    return ret

def generate_pair(model, words, rhymes):
    while True:
        a_line = generate_line(model, words)
        seed = a_line[0]
        if len(np.where(rhymes[seed] == 1)[0]) > 0:
            b_line = generate_line(model, words, seed, rhymes)
            return a_line, b_line
        
def generate_rhyming_and_meter_sonnet():
    sonnet = ''
    a_lines = []
    b_lines = []
    
    for _ in range(4):
        a_line, b_line = generate_pair(reversed_quatrain_model, quatrain_words, quatrain_rhymes)
        a_lines.append(a_line)
        b_lines.append(b_line)
    
    for i in range(2):
        sonnet += convert_line(a_lines[2 * i], quatrain_words) + '\n'
        sonnet += convert_line(a_lines[2 * i + 1], quatrain_words) + '\n'
        sonnet += convert_line(b_lines[2 * i], quatrain_words) + '\n'
        sonnet += convert_line(b_lines[2 * i + 1], quatrain_words) + '\n'
    
    a_lines = []
    b_lines = []
    
    for _ in range(2):
        a_line, b_line = generate_pair(reversed_volta_model, volta_words, volta_rhymes)
        a_lines.append(a_line)
        b_lines.append(b_line)
    
    sonnet += convert_line(a_lines[0], volta_words) + '\n'
    sonnet += convert_line(a_lines[1], volta_words) + '\n'
    sonnet += convert_line(b_lines[0], volta_words) + '\n'
    sonnet += convert_line(b_lines[1], volta_words) + '\n'
    
    a_line, b_line = generate_pair(reversed_couplet_model, couplet_words, couplet_rhymes)
    sonnet += convert_line(a_line, couplet_words) + '\n'
    sonnet += convert_line(b_line, couplet_words) + '\n'
    
    return sonnet

def generate_10_rhyming_and_meter_sonnets():
    sonnets = ''
    for i in range(10):
        print('Generating Sonnet ' + str(i + 1))
        sonnets += str(i) + '\n' + generate_rhyming_and_meter_sonnet() + '\n'
    
    f = open("project2data/rhyming_and_meter_shakespeare.txt","w")
    f.write(sonnets)
    return sonnets

In [38]:
print(generate_10_rhyming_and_meter_sonnets())

Generating Sonnet1
Generating Sonnet2
Generating Sonnet3
Generating Sonnet4
Generating Sonnet5
Generating Sonnet6
Generating Sonnet7
Generating Sonnet8
Generating Sonnet9
Generating Sonnet10
0
My speed hath all filled and thy shouldst redeem 
Nor my rose of a height that far have year 
For see more should mock me thing and the deem 
Such the bear though their time when since your near 
Smell thoughts my odour of life worth's her heart 
Pen the darkening will your face to you 
From fingers me my able as resort 
Leads my rose to every siren due 
Crowning strong blessed at is truetelling strong 
Have guess that me from base look eyes my doom 
Those beauty's say dissuade on my belong 
Sworn to triumphant and one straying room 
That then me keep but virtue thy to heart 
Pays sing for live else suns my love in sort 

1
Boughs I that sweet least might you leaves redeem 
And for I breath such self of his excess 
That sick forbid are with give did the deem 
Or which knows you than see did it we



In [141]:
# checks if line is in iambic pentameter (i.e. 0101010101 stress pattern)
def check_iambic_pentameter(line):
    # get the stresses from cmu dict 
    # if word is 1 syllable, then have the option for it to be stressed or unstressed
    stresses = []
    for i in line.split(' '):
        stress = poetrytools.stress(i)
        if len(stress) == 1:
            stresses.append(['0','1'])
        else:
            stresses.append([stress])
    
    # make combination of all possible stress patterns
    result = [[]]
    final = []
    for pool in stresses:
        result = [x+[y] for x in result for y in pool]
    final = [''.join(i) for i in result]
    
    # return if any pattern fits iambic pentameter 
    return ('0101010101' in final)