# News to poems

In [20]:
import nltk
from pprint import pprint
from nltk.parse.stanford import StanfordDependencyParser

In [2]:
partial_grammar = """
S -> NP VP
PP -> P NP
NP -> Det N | Det N PP | 'I'
VP -> V NP | VP PP
"""
title = "British scientist says memories of spending the night with Marilyn Monroe could be implanted into the brain"
title_words = nltk.word_tokenize(title)
title_pos = nltk.pos_tag(title_words)
print title_pos

[('British', 'JJ'), ('scientist', 'NN'), ('says', 'VBZ'), ('memories', 'NNS'), ('of', 'IN'), ('spending', 'VBG'), ('the', 'DT'), ('night', 'NN'), ('with', 'IN'), ('Marilyn', 'NNP'), ('Monroe', 'NNP'), ('could', 'MD'), ('be', 'VB'), ('implanted', 'VBN'), ('into', 'IN'), ('the', 'DT'), ('brain', 'NN')]


In [3]:
def lex_grammar(words_pos):
    lexicon = {}
    for word, pos in sorted(words_pos, key=lambda (a, b): b):
        if pos not in lexicon:
            lexicon[pos] = []
        lexicon[pos].append(word)
    return "\n".join("{pos} -> {words}".format(pos=pos, words="|".join(ws)) for pos, ws in lexicon.iteritems())

In [5]:
second_part_of_grammar = lex_grammar(title_pos)
grammar = partial_grammar + second_part_of_grammar

In [6]:
print grammar


S -> NP VP
PP -> P NP
NP -> Det N | Det N PP | 'I'
VP -> V NP | VP PP
MD -> could
VB -> be
VBG -> spending
NN -> scientist|night|brain
VBN -> implanted
JJ -> British
IN -> of|with|into
VBZ -> says
DT -> the|the
NNS -> memories
NNP -> Marilyn|Monroe


Before proceeding you should download zip-archive with stanford parser:

http://nlp.stanford.edu/software/stanford-parser-full-2015-04-20.zip

We need two files from there:
- stanford/stanford-parser.jar
- stanford/stanford-parser-3.5.2-models.jar

Create a directory 'stanford' and put them there.

In [24]:
parser = StanfordDependencyParser(path_to_jar='stanford/stanford-parser.jar',\
                                  path_to_models_jar='stanford/stanford-parser-3.5.2-models.jar')
parsed_tree = list(parser.parse(title_words))

print title
pprint(list(parsed_tree[0].triples()))
    

British scientist says memories of spending the night with Marilyn Monroe could be implanted into the brain
[((u'says', u'VBZ'), u'nsubj', (u'scientist', u'NN')),
 ((u'scientist', u'NN'), u'amod', (u'British', u'JJ')),
 ((u'says', u'VBZ'), u'dobj', (u'memories', u'NNS')),
 ((u'memories', u'NNS'), u'nmod', (u'spending', u'NN')),
 ((u'spending', u'NN'), u'case', (u'of', u'IN')),
 ((u'says', u'VBZ'), u'nmod:tmod', (u'night', u'NN')),
 ((u'night', u'NN'), u'det', (u'the', u'DT')),
 ((u'says', u'VBZ'), u'advcl', (u'implanted', u'VBN')),
 ((u'implanted', u'VBN'), u'mark', (u'with', u'IN')),
 ((u'implanted', u'VBN'), u'nsubjpass', (u'Monroe', u'NNP')),
 ((u'Monroe', u'NNP'), u'compound', (u'Marilyn', u'NNP')),
 ((u'implanted', u'VBN'), u'aux', (u'could', u'MD')),
 ((u'implanted', u'VBN'), u'auxpass', (u'be', u'VB')),
 ((u'implanted', u'VBN'), u'nmod', (u'brain', u'NN')),
 ((u'brain', u'NN'), u'case', (u'into', u'IN')),
 ((u'brain', u'NN'), u'det', (u'the', u'DT'))]


In [26]:
test_sent = "A ball is put into the box"
pprint(list(list(parser.parse(test_sent.split(' ')))[0].triples()))

[((u'put', u'VBN'), u'nsubjpass', (u'ball', u'NN')),
 ((u'ball', u'NN'), u'det', (u'A', u'DT')),
 ((u'put', u'VBN'), u'auxpass', (u'is', u'VBZ')),
 ((u'put', u'VBN'), u'nmod', (u'box', u'NN')),
 ((u'box', u'NN'), u'case', (u'into', u'IN')),
 ((u'box', u'NN'), u'det', (u'the', u'DT'))]
