# News to poems

In [1]:
import nltk
from pprint import pprint
from nltk.parse.stanford import StanfordDependencyParser

In [2]:
partial_grammar = """
S -> NP VP
PP -> P NP
NP -> Det N | Det N PP | 'I'
VP -> V NP | VP PP
"""
title = "British scientist says memories of spending the night with Marilyn Monroe could be implanted into the brain"
title_words = nltk.word_tokenize(title)
title_pos = nltk.pos_tag(title_words)
print title_pos

[('British', 'JJ'), ('scientist', 'NN'), ('says', 'VBZ'), ('memories', 'NNS'), ('of', 'IN'), ('spending', 'VBG'), ('the', 'DT'), ('night', 'NN'), ('with', 'IN'), ('Marilyn', 'NNP'), ('Monroe', 'NNP'), ('could', 'MD'), ('be', 'VB'), ('implanted', 'VBN'), ('into', 'IN'), ('the', 'DT'), ('brain', 'NN')]


In [3]:
def lex_grammar(words_pos):
    lexicon = {}
    for word, pos in sorted(words_pos, key=lambda (a, b): b):
        if pos not in lexicon:
            lexicon[pos] = []
        lexicon[pos].append(word)
    return "\n".join("{pos} -> {words}".format(pos=pos, words="|".join(ws)) for pos, ws in lexicon.iteritems())

In [4]:
second_part_of_grammar = lex_grammar(title_pos)
grammar = partial_grammar + second_part_of_grammar

In [5]:
print grammar


S -> NP VP
PP -> P NP
NP -> Det N | Det N PP | 'I'
VP -> V NP | VP PP
MD -> could
VB -> be
VBG -> spending
NN -> scientist|night|brain
VBN -> implanted
JJ -> British
IN -> of|with|into
VBZ -> says
DT -> the|the
NNS -> memories
NNP -> Marilyn|Monroe


Before proceeding you should download zip-archive with stanford parser:

http://nlp.stanford.edu/software/stanford-parser-full-2015-04-20.zip

We need two files from there:
- stanford/stanford-parser.jar
- stanford/stanford-parser-3.5.2-models.jar

Create a directory 'stanford' and put them there.

In [6]:
parser = StanfordDependencyParser(path_to_jar='stanford/stanford-parser.jar',\
                                  path_to_models_jar='stanford/stanford-parser-3.5.2-models.jar')
parsed_tree = list(parser.parse(title_words))

print title
pprint(list(parsed_tree[0].triples()))
    

British scientist says memories of spending the night with Marilyn Monroe could be implanted into the brain
[((u'says', u'VBZ'), u'nsubj', (u'scientist', u'NN')),
 ((u'scientist', u'NN'), u'amod', (u'British', u'JJ')),
 ((u'says', u'VBZ'), u'dobj', (u'memories', u'NNS')),
 ((u'memories', u'NNS'), u'nmod', (u'spending', u'NN')),
 ((u'spending', u'NN'), u'case', (u'of', u'IN')),
 ((u'says', u'VBZ'), u'nmod:tmod', (u'night', u'NN')),
 ((u'night', u'NN'), u'det', (u'the', u'DT')),
 ((u'says', u'VBZ'), u'advcl', (u'implanted', u'VBN')),
 ((u'implanted', u'VBN'), u'mark', (u'with', u'IN')),
 ((u'implanted', u'VBN'), u'nsubjpass', (u'Monroe', u'NNP')),
 ((u'Monroe', u'NNP'), u'compound', (u'Marilyn', u'NNP')),
 ((u'implanted', u'VBN'), u'aux', (u'could', u'MD')),
 ((u'implanted', u'VBN'), u'auxpass', (u'be', u'VB')),
 ((u'implanted', u'VBN'), u'nmod', (u'brain', u'NN')),
 ((u'brain', u'NN'), u'case', (u'into', u'IN')),
 ((u'brain', u'NN'), u'det', (u'the', u'DT'))]


In [7]:
test_sent = "A ball is put into the box"
pprint(list(list(parser.parse(test_sent.split(' ')))[0].triples()))

[((u'put', u'VBN'), u'nsubjpass', (u'ball', u'NN')),
 ((u'ball', u'NN'), u'det', (u'A', u'DT')),
 ((u'put', u'VBN'), u'auxpass', (u'is', u'VBZ')),
 ((u'put', u'VBN'), u'nmod', (u'box', u'NN')),
 ((u'box', u'NN'), u'case', (u'into', u'IN')),
 ((u'box', u'NN'), u'det', (u'the', u'DT'))]


## Morphological analysis

In [8]:
import re
def tokenize(s, pattern=r'\W+'):
    return filter(None, re.split(pattern, s))

In [36]:
from nltk.corpus import cmudict
from itertools import izip_longest, chain
d = cmudict.dict()

poem_file = open('data/guinea_pig.txt')
corpus = poem_file.readlines()
poem_file.close()

poem_structure_1 = [
    (8, [1,3,5,7],),
    (8, [1,3,5,7],),
    (8, [1,3,5,7],),
    (8, [1,3,5,7],),
]
# for line in corpus:
#     for token in tokenize(line):
#         print token, d.get(token.lower())

In [45]:
def num_of_syllables(phonetic_translation):
    number_of_syllables = 0
    for el in phonetic_translation:
        if el[-1].isdigit() == True:
            number_of_syllables += 1
        else:
            pass
    return number_of_syllables
        
def is_stressed(phoneme):
    if phoneme[-1] == '1':
        return True
    else:
        return False

def stress_mask(phonemes):
    return [is_stressed(ph) for ph in phonemes if is_vowel(ph)]

def combinations(*args):
    if len(args) == 1:
        return [[x] for x in args[0]]
    combs = []
    for item in args[0]:
        for x in combinations(*args[1:]):
            combs.append([item] + x)
    return combs


# helper function of validate_couplet(x,y)
def validate_line(line, structure):
    line_phonemes = [] 
    words = line.split(' ')
    for word in words:
        word_phonemes = d.get(word)
        line_phonemes.append(word_phonemes)
    for candidate in combinations(*line_phonemes):
        stresses = stress_mask(chain.from_iterable(candidate))
        print candidate, stresses
    
        


#returns true or false; does a couplet answer to the above mentioned scheme or not?
def validate_couplet(couplet, structure):
    pass

def is_vowel(sound):
    return sound[-1].isdigit()

def is_rhyme(pron1, pron2):
    vowels = [(i, sound) for i, sound in enumerate(pron1) if is_vowel(sound)]
    if vowels:
        idx, last_vowel = vowels[-1]
        return pron1[-(idx + 1):] == pron2[-(idx + 1):]
    else:
        return False
# #test
# for pron1 in d.get('was'):
#     for pron2 in d.get('because'):
#         print pron1, pron2, is_rhyme(pron1, pron2)
combinations([1,2,3],[4],[5,6],[7,8,9])
validate_line("there was a little guinea pig", [1,3,5,7])
print
validate_line("he always walked upon his feet", None)
print
validate_line("and while he run as i am told", None)

[[u'DH', u'EH1', u'R'], [u'W', u'AA1', u'Z'], [u'AH0'], [u'L', u'IH1', u'T', u'AH0', u'L'], [u'G', u'IH1', u'N', u'IY0'], [u'P', u'IH1', u'G']] [True, True, False, True, False, True, False, True]
[[u'DH', u'EH1', u'R'], [u'W', u'AA1', u'Z'], [u'EY1'], [u'L', u'IH1', u'T', u'AH0', u'L'], [u'G', u'IH1', u'N', u'IY0'], [u'P', u'IH1', u'G']] [True, True, True, True, False, True, False, True]
[[u'DH', u'EH1', u'R'], [u'W', u'AH1', u'Z'], [u'AH0'], [u'L', u'IH1', u'T', u'AH0', u'L'], [u'G', u'IH1', u'N', u'IY0'], [u'P', u'IH1', u'G']] [True, True, False, True, False, True, False, True]
[[u'DH', u'EH1', u'R'], [u'W', u'AH1', u'Z'], [u'EY1'], [u'L', u'IH1', u'T', u'AH0', u'L'], [u'G', u'IH1', u'N', u'IY0'], [u'P', u'IH1', u'G']] [True, True, True, True, False, True, False, True]
[[u'DH', u'EH1', u'R'], [u'W', u'AH0', u'Z'], [u'AH0'], [u'L', u'IH1', u'T', u'AH0', u'L'], [u'G', u'IH1', u'N', u'IY0'], [u'P', u'IH1', u'G']] [True, False, False, True, False, True, False, True]
[[u'DH', u'EH1', u'R'

In [40]:
d.get("there was")