In [20]:
import numpy as np
import pandas as pd
import keras
import eng_to_ipa as ipa
from pronouncing import rhymes

Using TensorFlow backend.


In [21]:
df = pd.read_csv('Multiple_Forms.csv', index_col = 0)
df.head()

Unnamed: 0,Author,Poem,Form
0,William Shakespeare,"From fairest creatures we desire increase,\r\n...",Sonnet
1,William Shakespeare,"When forty winters shall besiege thy brow,\r\n...",Sonnet
2,William Shakespeare,Look in thy glass and tell the face thou viewe...,Sonnet
3,William Shakespeare,"Unthrifty loveliness, why dost thou spend\r\n ...",Sonnet
4,William Shakespeare,"Those hours, that with gentle work did frame\r...",Sonnet


In [22]:
poems = df['Poem']
poems.head(20)

0     From fairest creatures we desire increase,\r\n...
1     When forty winters shall besiege thy brow,\r\n...
2     Look in thy glass and tell the face thou viewe...
3     Unthrifty loveliness, why dost thou spend\r\n ...
4     Those hours, that with gentle work did frame\r...
5     Then let not winter's ragged hand deface,\r\n ...
6     Lo! in the orient when the gracious light\r\n ...
7     Music to hear, why hear'st thou music sadly?\r...
8     Is it for fear to wet a widow's eye,\r\n  That...
9     For shame! deny that thou bear'st love to any,...
10    As fast as thou shalt wane, so fast thou grow'...
11    When I do count the clock that tells the time,...
12    O! that you were your self; but, love you are\...
13    Not from the stars do I my judgement pluck;\r\...
14    When I consider every thing that grows\r\n  Ho...
15    But wherefore do not you a mightier way\r\n  M...
16    Who will believe my verse in time to come,\r\n...
17    Shall I compare thee to a summer's day?\r\

In [36]:
text = ''
for poem in poems:
    text += str(poem) + '\n'
len(text)

780446

In [38]:
text = text.replace("'d", "ed")
text = text.replace("(", "")
text = text.replace(")", "")
text = text.replace(" '", " ")
text = text.replace("' ", " ")
text = text.replace('"', '')
text = text.replace("--", " ")
text = text.replace(":-", ".")
text = text.replace("-:", ".")
text = text.replace(".-", ".")
text = text.replace("- ", " ")
text = text.replace(" -", " ")
text = text.replace(" –", " ")
while text.find(" . ") != -1:
    text = text.replace(" . ", " ")
while text.find("..") != -1:
    text = text.replace("..", ".")
print(text)
text = text.replace("?.", "? ")
text = text.replace("!.", "! ")

From fairest creatures we desire increase,
  That thereby beauty's rose might never die,
  But as the riper should by time decease,
  His tender heir might bear his memory:
  But thou, contracted to thine own bright eyes,
  Feed'st thy light's flame with self-substantial fuel,
  Making a famine where abundance lies,
  Thyself thy foe, to thy sweet self too cruel:
  Thou that art now the world's fresh ornament,
  And only herald to the gaudy spring,
  Within thine own bud buriest thy content,
  And tender churl mak'st waste in niggarding:
        Pity the world, or else this glutton be,
        To eat the world's due, by the grave and thee.
When forty winters shall besiege thy brow,
  And dig deep trenches in thy beauty's field,
  Thy youth's proud livery so gazed on now,
  Will be a tattered weed of small worth held:
  Then being asked, where all thy beauty lies,
  Where all the treasure of thy lusty days;
  To say, within thine own deep sunken eyes,
  Were an all-e

In [57]:
word_list = text.split()
a = 0
for i in range(len(word_list) - a):
    word_list[i - a] = word_list[i - a].strip("“”''-")
    if word_list[i - a] == 'nan':
        word_list.pop(i - a)
        a += 1
len(word_list)

137373

In [58]:
print(word_list)



In [59]:
maxlen = 6                                                            
step = 1 

sentences = []                                                         

next_words = []                                                        

for i in range(0, len(word_list) - maxlen, step):
    sentences.append(word_list[i: i + maxlen])
    next_words.append(word_list[i + maxlen])

print('Number of sequences:', len(sentences))

words = sorted(list(set(word_list)))                                        
print('Unique words:', len(words))
word_indices = dict((word, words.index(word)) for word in words)       

print('Vectorization...')
x = np.zeros((len(sentences), maxlen, len(words)), dtype=np.bool)      
y = np.zeros((len(sentences), len(words)), dtype=np.bool)              
for i, sentence in enumerate(sentences):                               
    for t, word in enumerate(sentence):                                
        x[i, t, word_indices[word]] = 1                                
    y[i, word_indices[next_words[i]]] = 1 

Number of sequences: 137367
Unique words: 24507
Vectorization...


In [60]:
from keras import layers

model = keras.models.Sequential()
model.add(layers.LSTM(128, input_shape=(maxlen, len(words))))
model.add(layers.Dense(len(words), activation='softmax'))

In [61]:
optimizer = keras.optimizers.RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

In [62]:
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [63]:
import random
import sys
new_rhyme = 1

generated_text = ''
count = 0

for epoch in range(1, 16):
    generated_text = ''
    generated_list = []
    print('\nepoch', epoch)
#     model.fit(x, y, batch_size=128, epochs=1)                       
    start_index = random.randint(0, len(word_list) - maxlen - 1)         
    generated_list = word_list[start_index: start_index + maxlen]
#     generated_list = ['The', 'cat', 'in', 'hat']
    for word in generated_list:
        generated_text += word + ' '
    generated_text =  generated_text.strip()
    print('--- Generating with seed: "' + generated_text + '"')
    for num in ipa.syllable_count(generated_text):
        count += num
    for temperature in [0.2, 0.5, 1.0, 1.2]:                        
        print('------ temperature:', temperature)
        sys.stdout.write(generated_text)

        for i in range(400):                                        
            sampled = np.zeros((1, maxlen, len(words)))             
            for t, word in enumerate(generated_list):               
                sampled[0, t, word_indices[word]] = 1.              

            if count == 7 and new_rhyme == 0:
                preds = model.predict(sampled, verbose=0)[0] * z
                next_index = sample(preds, temperature)                 
                next_word = words[next_index]
                if ipa.syllable_count(next_word) <= 2:
                    preds = model.predict(sampled, verbose=0)[0]
                    next_index = sample(preds, temperature)                 
                    next_word = words[next_index] 
            elif count == 8 and new_rhyme == 0:
                preds = model.predict(sampled, verbose=0)[0] * z
                next_index = sample(preds, temperature)                 
                next_word = words[next_index]
                if ipa.syllable_count(next_word) <= 1:
                    preds = model.predict(sampled, verbose=0)[0]
                    next_index = sample(preds, temperature)                 
                    next_word = words[next_index] 
            elif count == 9 and new_rhyme == 0:
                preds = model.predict(sampled, verbose=0)[0] * z
                next_index = sample(preds, temperature)                 
                next_word = words[next_index]
            else:
                preds = model.predict(sampled, verbose=0)[0]
                next_index = sample(preds, temperature)                 
                next_word = words[next_index]

            generated_list.append(next_word)
            generated_list = generated_list[1:]


            if count >= 10:
                sys.stdout.write('\n')
                count = 0
            sys.stdout.write(' ' + next_word)
            if ipa.syllable_count(next_word) == 0:
                if len(next_word) >= 9:
                    count += 3
                elif len(next_word) >= 6:
                    count += 2
                elif len(next_word) == 1:
                    count += 0
                else:
                    count += 1
            else:
                count += ipa.syllable_count(next_word)
            if count >= 10:
                new_rhyme += 1
            if count >= 10:
                if new_rhyme >= 2:
                    z = np.zeros(len(words)) + 0.0001
#                     print("\nNext Word:", next_word)
                    r_word = next_word[:].lower()
                    r_word = r_word.strip(",.?!:;-'\"_")
                    if r_word == 'and':
                        r_word = 'zand'
                    else:    
                        while rhymes(r_word) == [] and len(r_word) > 0:
                            r_word = r_word[1:]
#                             print(r_word)
                    rhymers = rhymes(r_word)
#                     print(rhymers)
                    for rhyme in rhymers:
                        if rhyme in word_indices:
                            z[word_indices[rhyme]] = 1
                    new_rhyme = 0
                    h = z - 0.0001
#                     print(h.sum())
                else:
                    sys.stdout.write('\n')


epoch 1
--- Generating with seed: "and Marlbro stay at home. With"
------ temperature: 0.2
and Marlbro stay at home. With Augur refuse, quickly
 Rudiger gav'st breast-flowers sickly

 Jugglers unripe pleasure! armour images
 due? desolation! Etna quality;

 made. Rutulia breezes disabled things? dies.
 scruples trust; strive, lookest thou'lt Neere rise

 slag reeds strand, love-sick dim-discoloured Nelson's
 Exulting Things DARK Hipparchia, why tons

 sometimes corn, alchemy; What exceeded
 lovers a', concording cloth balk adverse

 moist Nymphs, likened blur North Lank waye; breath speed:
 viewing, forbids, honourable; takest flame: indeed

 bonie sick'ning century Resounding
 Humanity's favours curb confounding

 paper; cleare crawl else, fader iron wedding
 publick numbers ago! love-darting dreading

 harsh rescuer end? sendaline Lust-bred
 begot anguish, blode. cost. Tree

KeyboardInterrupt: 

In [102]:
import pronouncing

In [103]:
pronouncing.rhymes('what\'ll')

['nuttal',
 'nuttall',
 'nuttle',
 'rebuttal',
 'rebuttal',
 'ruttle',
 'scuttle',
 'shuttle',
 'subtle',
 'suttle',
 'tuttle',
 'unsubtle']

In [104]:
ipa.get_rhymes('these')

['abductees',
 'abdulaziz',
 'absentees',
 'abts',
 'adoptees',
 'adorees',
 'agrees',
 'amputees',
 'andries',
 'annamese',
 "apc's",
 'appease',
 'appointees',
 'ashis',
 'attendees',
 "aujourd'hui's",
 'aziz',
 "b's",
 "b.'s",
 'b.s',
 'balinese',
 "be's",
 'beas',
 "bee's",
 'bees',
 'beese',
 'belize',
 'bes',
 "bibi's",
 'brees',
 'breese',
 'breeze',
 'briese',
 'brunjes',
 'burmese',
 "c's",
 "c.'s",
 'c.d.s',
 'c.s',
 'cadiz',
 'cantonese',
 "capri's",
 'cds',
 'cees',
 'cheese',
 'chemise',
 'chinese',
 'conferees',
 'congolese',
 'crees',
 "d's",
 "d.'s",
 'd.s',
 'deas',
 'decrees',
 "dee's",
 'dees',
 'deese',
 'defrees',
 'defries',
 'degrees',
 'deis',
 'delouis',
 'deportees',
 'designees',
 'detainees',
 'devotees',
 'disagrees',
 'disease',
 'displease',
 'draftees',
 'drees',
 'dreese',
 'dreis',
 'duvrees',
 'dvds',
 "e's",
 "e.'s",
 'e.s',
 'ease',
 'enlistees',
 'enrollees',
 'escapees',
 "esprit's",
 'expertise',
 'faries',
 'fees',
 'feese',
 'fejes',
 'fleas',


In [105]:
pronunciation_list = pronouncing.phones_for_word("fortitude")
pronouncing.syllable_count(pronunciation_list[0])

3

In [106]:
ipa.get_rhymes('orange')

[]

In [107]:
pronouncing.rhymes('hinge')

['binge',
 'cringe',
 'fringe',
 'impinge',
 'infringe',
 'inge',
 'klinge',
 'minge',
 'singe',
 'syringe',
 'tinge',
 'twinge',
 'unhinge',
 'vinje',
 'winge']

In [108]:
b_word = 'happy."'
b_word = b_word.strip(",.?!:;-'\"_")
b_word

'happy'

In [109]:
ipa.convert('fortunate')

'ˈfɔrʧənət'

In [110]:
ipa.convert('tu')

'tu'

In [2]:
import nltk
from nltk.corpus import cmudict
nltk.download('cmudict')
d = cmudict.dict()

[nltk_data] Downloading package cmudict to
[nltk_data]     C:\Users\kyle_\AppData\Roaming\nltk_data...
[nltk_data]   Package cmudict is already up-to-date!


In [5]:
def nsyl(word):
  [print(x) for x in d[word.lower()]]
  return [len(list(y for y in x if y[-1].isdigit())) for x in d[word.lower()]]

In [18]:
nsyl('think')

['TH', 'IH1', 'NG', 'K']


[1]

In [None]:
def nsyl(word):
  return [(list(y for y in x if y[-1].isdigit())) for x in d[word.lower()]]