In [1]:
# https://stanfordnlp.github.io/stanfordnlp/corenlp_client.html
# https://stackoverflow.com/questions/10401076/difference-between-constituency-parser-and-dependency-parser

In [2]:
#!/usr/bin/env python3

from nltk.parse import CoreNLPParser
from nltk.parse.corenlp import CoreNLPDependencyParser
from IPython.display import display
from pprint import pprint

parser = CoreNLPParser(url='http://188.166.145.126:9000')
dep_parser = CoreNLPDependencyParser(url='http://188.166.145.126:9000')

text = ['What is the airspeed of an unladen swallow?']
text = ['I want to drink wine for dinner']

#####################
# dependency parser #
#####################

"""OPTION1 parse()
http://www.nltk.org/api/nltk.parse.html#nltk.parse.corenlp.CoreNLPDependencyParser
#Return type:iter(Tree)
"""
res = dep_parser.parse(text) # if sent not in a lit use .split()


"""OPTION 2 raw_parse(): 
http://www.nltk.org/api/nltk.parse.html#nltk.parse.corenlp.GenericCoreNLPParser.raw_parse
Takes a sentence as a string; 
before parsing, it will be automatically tokenized 
and tagged by the CoreNLP Parser.
Return type:iter(Tree)
"""
res = dep_parser.raw_parse(text[0])

# accessing the dependencies
dependencies = next(res)


"""dependencies.tree()
"""
print('dependency tree:', list(dependencies.tree()))

"""dependencies.to_connl()
"""
# print('\n', dependencies.to_conll(style=10))

"""dependencies.triples()
Extract dependency triples of the form: ((head word, head tag), rel, (dep word, dep tag))
triples can be indexed
"""
for t in dependencies.triples():
    print(t)
#     print(t, t[0], t[0][0]) 

# dependencies are stored in a Dict
pprint(dependencies.root)
print("Head Word:", dependencies.root["word"])


next(
parser.raw_parse(text[0])
).pretty_print() 

dependency tree: ['I', Tree('drink', ['to', 'wine', Tree('dinner', ['for'])])]
(('want', 'VBP'), 'nsubj', ('I', 'PRP'))
(('want', 'VBP'), 'xcomp', ('drink', 'VB'))
(('drink', 'VB'), 'mark', ('to', 'TO'))
(('drink', 'VB'), 'dobj', ('wine', 'NN'))
(('drink', 'VB'), 'nmod', ('dinner', 'NN'))
(('dinner', 'NN'), 'case', ('for', 'IN'))
{'address': 2,
 'ctag': 'VBP',
 'deps': defaultdict(<class 'list'>, {'nsubj': [1], 'xcomp': [4]}),
 'feats': '_',
 'head': 0,
 'lemma': 'want',
 'rel': 'ROOT',
 'tag': 'VBP',
 'word': 'want'}
Head Word: want
    ROOT                                  
     |                                     
     S                                    
  ___|_________                            
 |             VP                         
 |    _________|____                       
 |   |              S                     
 |   |              |                      
 |   |              VP                    
 |   |     _________|____                  
 |   |    |              

In [6]:
# from edbullen
import re

parser = CoreNLPParser(url='http://188.166.145.126:9000', tagtype='pos')
dep_parser = CoreNLPDependencyParser(url='http://188.166.145.126:9000')


# A random selection of sentences with different styles, domains etc
sentences = [
             "I want to drink wine for dinner",
             "He watched the dark eyeslits narrowing with greed till her eyes were green stones",
             "When will the Oracle 12.2 database be released?",
             "Coherence is an in-memory grid cluster for Java code",
             "Oracle 12.2 will be released in March 2017",
             "PyData community gathers to discuss how best to apply languages and tools to continuously evolving challenges in data management, processing, analytics, and visualization.",
             "Arsenal are a football team in North London",
             "When will Arsenal ever win a match?"
            ]
regexpSubj = re.compile(r'subj')
regexpObj = re.compile(r'obj')
regexNouns = re.compile("^N.*|^PR.*")

# def get_compounds(triples, word):
#     compound = []
#     for t in triples:
#         print(t)
#         if t[0][0] == word:
#             if regexNouns.search(t[2][1]):
#                 compound.append(t[2][0])
#     return compound

for sentence in sentences:
    print()
    nouns = []
    tokens = sentence.split()
    print('tokens: ', tokens)
    tags = parser.tag(tokens)
    print(tags)
    for tag_pair in tags:
        if tag_pair[1]=='NN':
            nouns.append(tag_pair[0])
            
            
    result = dep_parser.raw_parse(sentence)
    dep = next(result)
    root = [dep.root["word"]]
#     root.append(get_compounds(dep.triples(), root))
#     print(root)
    
    subj, obj = [], []
    for t in dep.triples():
#         if regexpNouns.search(t[1]):
#             subj.append(t[2][0])
        if regexpSubj.search(t[1]):
            subj.append(t[2][0])
#             subj.append(get_compounds(dep.triples(),t[2][0]))
        if regexpObj.search(t[1]):
            obj.append(t[2][0])
#             obj.append(get_compounds(dep.triples(),t[2][0]))
    print("\n",sentence)
    print("Subject:",subj, "\nTopic:", root, "\nObject:",obj)
    print(nouns)


tokens:  ['I', 'want', 'to', 'drink', 'wine', 'for', 'dinner']
[('I', 'PRP'), ('want', 'VBP'), ('to', 'TO'), ('drink', 'VB'), ('wine', 'NN'), ('for', 'IN'), ('dinner', 'NN')]

 I want to drink wine for dinner
Subject: ['I'] 
Topic: ['want'] 
Object: ['wine']
['wine', 'dinner']

tokens:  ['He', 'watched', 'the', 'dark', 'eyeslits', 'narrowing', 'with', 'greed', 'till', 'her', 'eyes', 'were', 'green', 'stones']
[('He', 'PRP'), ('watched', 'VBD'), ('the', 'DT'), ('dark', 'JJ'), ('eyeslits', 'NNS'), ('narrowing', 'VBG'), ('with', 'IN'), ('greed', 'NN'), ('till', 'IN'), ('her', 'PRP$'), ('eyes', 'NNS'), ('were', 'VBD'), ('green', 'JJ'), ('stones', 'NNS')]

 He watched the dark eyeslits narrowing with greed till her eyes were green stones
Subject: ['He', 'eyeslits'] 
Topic: ['watched'] 
Object: []
['greed']

tokens:  ['When', 'will', 'the', 'Oracle', '12.2', 'database', 'be', 'released?']
[('When', 'WRB'), ('will', 'MD'), ('the', 'DT'), ('Oracle', 'NNP'), ('12.2', 'CD'), ('database', 'NN'),