In [1]:
import pronouncing
from itertools import product
import functools
import operator
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from tqdm import tqdm_notebook as tqdm
import string
import gensim
from multiprocessing import Pool

In [2]:
model = gensim.models.KeyedVectors.load('/mnt/bigfiles/dl/datasets/gensim-fasttext-300d-2M', mmap='r')

In [3]:
def remove_punctuation(phrase): return phrase.translate(str.maketrans({a:None for a in string.punctuation}))

In [268]:
stop = set(stopwords.words('english'))

INCLUDE_STOP = True
def synonyms(word):
    if not INCLUDE_STOP and word in stop:
        return [word]
    
    most_similar = [word] + [data[0].lower() for data in model.most_similar(positive=[word.lower()], topn=50)]
    only_pronouncable = [word for word in most_similar if len(pronouncing.phones_for_word(word)) > 0]
    
    return list(set(only_pronouncable))[:20]

synonyms('slow')

  if np.issubdtype(vec.dtype, np.int):


['slow',
 'slowing',
 'sluggish',
 'rapid',
 'slowed',
 'slower',
 'fast',
 'slows',
 'steady',
 'slowness',
 'plodding',
 'slowest']

In [109]:
def get_meter(line):
    line = remove_punctuation(line) 
    syllables = 0 
    stresses = []
    for word in word_tokenize(line):
        phonemes = pronouncing.phones_for_word(word)[0]
        syllables += pronouncing.syllable_count(phonemes)
        stresses.append(list(map(int, pronouncing.stresses(phonemes))))

    return syllables, stresses

In [110]:
get_meter('that was amazing')

(5, [[1], [1], [0, 1, 0]])

In [355]:
def check_meter(line, meter, partial=False, start=0, n_syllables=None): # meter is an array with False for unstressed, True for stressed
    line = remove_punctuation(line) 
    cycle_len = len(meter)
    meter_step = start
    
    syllables, stresses = get_meter(line)
    
    if n_syllables and syllables is not n_syllables:
        return False
    
    expected_next = False # not stressed
    
    for stress in stresses:
        if len(stress) == 1:
            meter_step += 1
            continue # one syllable words can be stressed or not stressed
        
        for syllable in stress:
            if syllable == 2:
                meter_step += 1
                continue # lightly stressed, can be either
                
            if (syllable == 1) == meter[meter_step % cycle_len]: 
                meter_step += 1
                continue
            else:
                return False
            
    if not partial:
        if meter_step % cycle_len == 0: # end at the correct step
            return True
        else:
            return False
    else:
        return True

In [331]:
iambic = [False, True]
trochaic = [True, False]
dactylic = [True, False, False]
dactylic_hexameter = [True, False, False] * 5 + [True, False]

In [124]:
print(check_meter('How do I love thee? Let me count the ways.', iambic))
print(check_meter('Is this smart enough to catch bad meter?', iambic))
print(check_meter('Once upon a midnight dreary, while I pondered, weak and weary', trochaic))
print(check_meter('This is the forest primeval, the murmuring pines and the hemlock', dactylic_hexameter))

True
False
True
True


In [86]:
test_poem = \
'''How do I love thee? Let me count the ways.
I love thee to the depth and breadth and height
My soul can reach, when feeling out of sight
For the ends of being and ideal grace.
I love thee to the level of every day's
Most quiet need, by sun and candle-light.
I love thee freely, as men strive for right.
I love thee purely, as they turn from praise.
I love thee with the passion put to use
In my old griefs, and with my childhood's faith.
I love thee with a love I seemed to lose
With my lost saints. I love thee with the breath,
Smiles, tears, of all my life; and, if God choose,
I shall but love thee better after death.
'''.split('\n')
for line in test_poem:
    try:
        if not check_meter(line, iambic):
            print(line)
    except:
        pass

For the ends of being and ideal grace.
I love thee to the level of every day's


In [11]:
# WIP
def make_chunk_iambic(words):
    word_synonyms = list(map(synonyms, words))
    
    iambic = []
    # https://stackoverflow.com/questions/32074543/how-to-get-the-length-of-an-itertools-product
    n_possible = functools.reduce(operator.mul, map(len, word_synonyms), 1)
    for possible in tqdm(product(*word_synonyms), total=n_possible):
        potential = ' '.join(possible)
        try:
            if check_iambic(potential):
                return potential

        except:
            continue

In [None]:
# https://stackoverflow.com/questions/32074543/how-to-get-the-length-of-an-itertools-product
def product_length(items): return functools.reduce(operator.mul, map(len, word_synonyms), 1)

def make_fit_meter(line, meter, return_early=10):
    words = line.split(' ') # word_tokenize(line)
    
    correct = []
    incorrect = []
    for i in range(len(words)):
        if check_meter(' '.join(words[:i + 1]), meter, partial=True):
            correct = words[:i+1]
        else:
            incorrect = words[i:]
            break
            
    word_synonyms = list(map(lambda word: [word], correct)) + list(map(synonyms, incorrect))
            
    for possible in tqdm(product(*word_synonyms), total=product_length(word_synonyms)):
        potential = ' '.join(possible)
        try:
            if check_meter(potential, meter):
                correct.append(potential)
                print(potential)

                if return_early and len(correct) >= return_early:
                    return correct
        except IndexError:
            continue
            
    return correct

In [356]:
INCLUDE_STOP = True
to_iambic = make_fit_meter("The quick brown fox jumped over", iambic)

In [None]:
corpus = word_tokenize(open('sermon_mount_NIV.txt').read())

In [357]:
to_iambic

[False, True]