In [11]:
import random
import re
import os
from HMM import unsupervised_HMM
from punctuation_dict import get_punctuation_dict
from punctuation_generator import get_punc
from syllable_dict import get_syllable_dict

In [12]:
text = open(os.path.join(os.getcwd(), 'data/shakespeare.txt')).read()

In [15]:
# Store a list of words to keep capitalized
cap_words = ["i'll", 'i', 'o']
punc_dict = get_punctuation_dict()

def process_word(word):
    '''
    This function takes as its input a word and returns the processed word by 
    getting rid of unnecessary punctuations / capitalizations. 
    ''' 
    # Exception "I'll" - confusion with ill should be manually taken care of
    if word == "I'll":
        return word
    
    # Convert to lowercase and remove punctuations not part of a word
    word = punc_dict[re.sub(r'[^\w]', '', word.lower())]

    # Keep certain words capitalized
    if word in cap_words:
        word = word.capitalize()
        
    return word

In [41]:
# Create rhyme dictionary

lines = [line.split() for line in text.split('\n') if line.split()]
sonnets = []
sonnet = []
for line in lines:
    if len(line) == 1:
        # Only store sonnets with 14 lines
        if len(sonnet) == 14:
            sonnets.append(sonnet)
        sonnet = []
        continue
    sonnet.append(line)

    
# This rhyme dictionary is a list of sets, where all the elements in each set rhyme with each other
rhyme_dict = []

def add_to_rhyme_dict(w1, w2):
    '''
    This function takes in a pair of rhyming words and adds them to the rhyme dictionary.
    '''
    # Store the group that one of the words belongs in, if any
    stored = None
    
    for group in rhyme_dict:
        if w1 in group:
            if not stored:
                group.add(w2)
                stored = group
            else:
                # Combine two groups
                stored.update(group)
                rhyme_dict.remove(group)
                break
        elif w2 in group:
            if not stored:
                group.add(w1)
                stored = group
            else:
                # Combine two groups
                stored.update(group)
                rhyme_dict.remove(group)
                break
    
    if not stored:
        rhyme_dict.append({w1, w2})
        
        
for sonnet in sonnets:
    # Get all the rhyming pairs in the first 3 stanzas
    for i in [0, 1, 4, 5, 8, 9]:
        word1 = process_word(sonnet[i][-1])
        word2 = process_word(sonnet[i+2][-1])
        add_to_rhyme_dict(word1, word2)
    # Last two rows of a sonnet rhyme
    add_to_rhyme_dict(process_word(sonnet[12][-1]), process_word(sonnet[13][-1]))

In [42]:
def parse_observations(text):
    # Convert text to dataset.
    lines = [line.split() for line in text.split('\n') if line.split()]
    
    obs_counter = 0
    obs = []
    obs_map = {}

    # Iterate through all the lines of poems
    for line in lines:
        # Skip line with poem id (not an actual line of poem)
        if len(line) == 1:
            continue

        # Reverse the line to train the HMM on reversed sequences
        line.reverse()
        
        obs_elem = []
        
        for word in line:
            word = process_word(word)
            
            if word not in obs_map:
                # Add unique words to the observations map.
                obs_map[word] = obs_counter
                obs_counter += 1
            
            # Add the encoded word.
            obs_elem.append(obs_map[word])
        
        # Add the encoded sequence.
        obs.append(obs_elem)

    return obs, obs_map

In [43]:
obs, obs_map = parse_observations(text)
syl_dict = get_syllable_dict()

In [63]:
hmm = unsupervised_HMM(obs, 20, 100)

1
2
3
4
5
6
7
8
9
10
Iteration: 10
11
12
13
14
15
16
17
18
19
20
Iteration: 20
21
22
23
24
25
26
27
28
29
30
Iteration: 30
31
32
33
34
35
36
37
38
39
40
Iteration: 40
41
42
43
44
45
46
47
48
49
50
Iteration: 50
51
52
53
54
55
56
57
58
59
60
Iteration: 60
61
62
63
64
65
66
67
68
69
70
Iteration: 70
71
72
73
74
75
76
77
78
79
80
Iteration: 80
81
82
83
84
85
86
87
88
89
90
Iteration: 90
91
92
93
94
95
96
97
98
99
100
Iteration: 100


In [89]:
def obs_map_reverser(obs_map):
    obs_map_r = {}

    for key in obs_map:
        obs_map_r[obs_map[key]] = key

    return obs_map_r


def sample_sentence_simple(hmm, obs_map, n_syl=10):
    # Get reverse map.
    obs_map_r = obs_map_reverser(obs_map)

    # Sample and convert sentence.
    emission, states = hmm.generate_emission(n_syl, obs_map_r, syl_dict)
    sentence = [obs_map_r[i] for i in emission][::-1]
    sentence[0] = sentence[0].capitalize()
    return ' '.join(sentence)


# Generate a sample sentence ending with a given word
def sample_sentence(hmm, obs_map, word, n_syl=10):
    # Get reverse map.
    obs_map_r = obs_map_reverser(obs_map)
    
    # Choose a state that could have generated the word
    state = hmm.find_state(obs_map[word])
    
    # Sample and convert sentence
    emission = hmm.generate_emission_rhyme(n_syl, obs_map, obs_map_r, syl_dict, word, state)[0]
    sentence = [obs_map_r[i] for i in emission][::-1]
    sentence[0] = sentence[0].capitalize()
    return ' '.join(sentence)


def generate_rhyming_sentences(hmm, obs_map, n_sentences=2, n_syl=10):
    # Choose a rhyming group with enough words
    groups = []
    for group in rhyme_dict:
        if len(group) >= n_sentences:
            groups.append(group)
    
    # Choose rhyming words
    words = random.sample(random.choice(groups), n_sentences)
    
    sentences = []
    for word in words:
        sentences.append(sample_sentence(hmm, obs_map, word, n_syl))
        
    return sentences

    
def generate_sonnet(hmm, obs_map):
    poem = ''
    for stanza in range(3):
        l1, l3 = generate_rhyming_sentences(hmm, obs_map)
        l2, l4 = generate_rhyming_sentences(hmm, obs_map)
        poem += (l1 + get_punc(0) + '\n' + l2 + get_punc(0) + '\n' + 
                l3 + get_punc(0) + '\n' + l4 + get_punc(1) + '\n')
        
    # Last stanza
    l1, l2 = generate_rhyming_sentences(hmm, obs_map)
    poem += '  ' + l1 + get_punc(2) + '\n  ' + l2 + get_punc(3) + '\n' 
    print(poem)
    

def generate_haiku(hmm, obs_map):
    print(sample_sentence_simple(hmm, obs_map, 5))
    print(sample_sentence_simple(hmm, obs_map, 7))
    print(sample_sentence_simple(hmm, obs_map, 5))
    
    
def generate_petrarchan_sonnet(hmm, obs_map):
    '''
    Generate a Petrarchan sonnet with rhyming scheme: ABBAABBA CDCDCD
    '''
    poem = ''
    
    # Generate 2 groups of 4 rhyming sentences (A, B),
    # 2 groups of 3 rhyming sentences (C, D)
    A = generate_rhyming_sentences(hmm, obs_map, 4)
    B = generate_rhyming_sentences(hmm, obs_map, 4)
    C = generate_rhyming_sentences(hmm, obs_map, 3)
    D = generate_rhyming_sentences(hmm, obs_map, 3)
    
    # First stanza
    poem += (A[0] + '\n' + B[0] + '\n' + B[1] + '\n' + A[1] + '\n' + 
            A[2] + '\n' + B[2] + '\n' + B[3] + '\n' + A[3] + '\n' + '\n')
    
    # Second stanza
    for i in range(3):
        poem += C[i] + '\n' + D[i] + '\n'
        
    print(poem)
        

def generate_limerick(hmm, obs_map):
    '''
    Generate a limerick with rhyming scheme: AABBA
    '''
    # Generate 3 rhyming sentences (A) with n1 syllables,
    # 2 rhyming sentences (B) with n2 syllables
    
    n1 = random.randint(7, 10)
    n2 = random.randint(5, 7)
    
    A = generate_rhyming_sentences(hmm, obs_map, 3, n1)
    B = generate_rhyming_sentences(hmm, obs_map, 2, n2)
    
    print(A[0] + '\n' + A[1] + '\n' + B[0] + '\n' + B[1] + '\n' + A[2] + '\n')

In [87]:
generate_sonnet(hmm, obs_map)

My worth she honour or proud the disease
Love I not no call beauty's a their used:
So so in dead their show then love in please,
Winds report the honour thou could abused.
My roses hast flies who I you am can,
Lack in a store but as proof that shape slave
Or verse and his mistress of that the man,
Must yet will from him and frailties to crave.
It death change withering in the beauties blood,
Of it and see those but nor to did chips,
The thing for thou my for in thy might brood,
Mayst been to of this were love fearing lips!
  You whose grave from new and tyrant once purge:
  Grounded kingly and their all such hour urge.



In [90]:
generate_haiku(hmm, obs_map)

Thy are to transport
If so if check love so though
Springs which whether I where


In [70]:
generate_petrarchan_sonnet(hmm, obs_map)

Thought my colour verse keeps my crests can made
She love my desire humble to see tomb
Height in like world I knowing strange shall womb
Live delight them in more thee think my shade
Whence thee is so be no and no shouldst fade
My self use lie with compounded of dumb
Fortune's for is beauty maladies come
Give or kind wherein cruel shows since jade

Doth put a perfumed new-fangled impute
Is flattered forth contented in from fair
Lust the their worst thou them women's stop fruit
Past with sweetest name with tiger's repair
On but gilded and since loves of lacked mute
With are charactered rich join with stage air



In [91]:
generate_limerick(hmm, obs_map)

Worth but lose his wealth not foul slide
O wherein my which hath belied
Thou to your earth be perish
Dost prepare my true cherish
Beauteous love he make an abide

