In [2]:
from nltk.parse.stanford import StanfordDependencyParser

import argparse
import sys
import re

In [4]:
path_to_jar = '../stanford-corenlp-full-2016-10-31/stanford-corenlp-3.7.0.jar'
path_to_models_jar = '../stanford-corenlp-full-2016-10-31/stanford-corenlp-3.7.0-models.jar'
dependency_parser = StanfordDependencyParser(path_to_jar=path_to_jar, path_to_models_jar=path_to_models_jar)
r = re.compile(r'\d{3,100}[ -]+\d{3,100}[ -]+\d{3,100}')

In [5]:
def dependency_parse(sentence):
    try:
        result = dependency_parser.raw_parse(sentence)
        dep = result.__next__()
        return list(dep.triples())
    except:
        print("EXCEPTION: while parsing sentence:")
        sys.exit(0)

In [6]:
dependency_parse("ARFTS specially binds to a distinct domain in XIAP-BIR3")

[(('ARFTS', 'NNS'), 'amod', ('binds', 'JJ')),
 (('binds', 'JJ'), 'advmod', ('specially', 'RB')),
 (('ARFTS', 'NNS'), 'nmod', ('domain', 'NN')),
 (('domain', 'NN'), 'case', ('to', 'TO')),
 (('domain', 'NN'), 'det', ('a', 'DT')),
 (('domain', 'NN'), 'amod', ('distinct', 'JJ')),
 (('domain', 'NN'), 'nmod', ('XIAP-BIR3', 'NN')),
 (('XIAP-BIR3', 'NN'), 'case', ('in', 'IN'))]

The above output is wrong. You need to fix it! Compare the output from the Stanford CoreNLP demo

__Caution:__ Do not use the parser from NLTK!

In [7]:
import json
import os
from pprint import pprint
from pycorenlp import StanfordCoreNLP

In [11]:
nlp = StanfordCoreNLP('http://localhost:9000')
properties={'annotators': 'depparse', 'outputFormat': 'json'}

def parse(fname):
    save_output = ''
    with open(fname, 'r') as fhandle, open(fname + '.deps.json', 'w') as whandle:
        for line in fhandle:
            output = nlp.annotate(line.strip(), properties)
            whandle.write(json.dumps(output['sentences'][0]) + "\n")

Let us test the dependencies on a sample file:

In [12]:
%cat ../sampleFile.txt

Royal likes Mangoes
Royal wants to go back to Mangalore
Royal wants to spend the rest of his life reading books and learning new languages.
at times , the suspense is palpable , but by the end there 's a sense that the crux of the mystery hinges on a technicality that strains credulity and leaves the viewer haunted by the waste of potential .

In [13]:
parse('../sampleFile.txt')

### Dependency Details
The above way of parsing gives three levels of dependency parsing:
- basicDependencies,
- enhancedDepencies, and
- enhancedPlusDepencies

For now, I am saving all the information that I get from dependency parsing. I will later exploe what each of these levels mean.

In [24]:
with open('../sampleFile.txt.deps.json') as json_file:
    for line in json_file:
        a = json.loads(line)
        pprint(a)
#         for x in a['enhancedPlusPlusDependencies']:
#             pprint(x)

{'basicDependencies': [{'dep': 'ROOT',
                        'dependent': 2,
                        'dependentGloss': 'likes',
                        'governor': 0,
                        'governorGloss': 'ROOT'},
                       {'dep': 'nsubj',
                        'dependent': 1,
                        'dependentGloss': 'Royal',
                        'governor': 2,
                        'governorGloss': 'likes'},
                       {'dep': 'dobj',
                        'dependent': 3,
                        'dependentGloss': 'Mangoes',
                        'governor': 2,
                        'governorGloss': 'likes'}],
 'enhancedDependencies': [{'dep': 'ROOT',
                           'dependent': 2,
                           'dependentGloss': 'likes',
                           'governor': 0,
                           'governorGloss': 'ROOT'},
                          {'dep': 'nsubj',
                           'dependent': 1,
                 

             'characterOffsetBegin': 56,
             'characterOffsetEnd': 59,
             'index': 12,
             'originalText': 'and',
             'pos': 'CC',
             'word': 'and'},
            {'after': ' ',
             'before': ' ',
             'characterOffsetBegin': 60,
             'characterOffsetEnd': 68,
             'index': 13,
             'originalText': 'learning',
             'pos': 'VBG',
             'word': 'learning'},
            {'after': ' ',
             'before': ' ',
             'characterOffsetBegin': 69,
             'characterOffsetEnd': 72,
             'index': 14,
             'originalText': 'new',
             'pos': 'JJ',
             'word': 'new'},
            {'after': '',
             'before': ' ',
             'characterOffsetBegin': 73,
             'characterOffsetEnd': 82,
             'index': 15,
             'originalText': 'languages',
             'pos': 'NNS',
             'word': 'languages'},
            {'after': ''

                                   'governorGloss': 'palpable'},
                                  {'dep': 'cc',
                                   'dependent': 9,
                                   'dependentGloss': 'but',
                                   'governor': 7,
                                   'governorGloss': 'palpable'},
                                  {'dep': 'case',
                                   'dependent': 10,
                                   'dependentGloss': 'by',
                                   'governor': 12,
                                   'governorGloss': 'end'},
                                  {'dep': 'det',
                                   'dependent': 11,
                                   'dependentGloss': 'the',
                                   'governor': 12,
                                   'governorGloss': 'end'},
                                  {'dep': 'nmod:by',
                                   'dependent': 12,
            

In [341]:
%cat sampleFile.txt.deps.json

{"index": 0, "basicDependencies": [{"dep": "ROOT", "governor": 0, "governorGloss": "ROOT", "dependent": 2, "dependentGloss": "likes"}, {"dep": "nsubj", "governor": 2, "governorGloss": "likes", "dependent": 1, "dependentGloss": "Royal"}, {"dep": "dobj", "governor": 2, "governorGloss": "likes", "dependent": 3, "dependentGloss": "Mangoes"}], "enhancedDependencies": [{"dep": "ROOT", "governor": 0, "governorGloss": "ROOT", "dependent": 2, "dependentGloss": "likes"}, {"dep": "nsubj", "governor": 2, "governorGloss": "likes", "dependent": 1, "dependentGloss": "Royal"}, {"dep": "dobj", "governor": 2, "governorGloss": "likes", "dependent": 3, "dependentGloss": "Mangoes"}], "enhancedPlusPlusDependencies": [{"dep": "ROOT", "governor": 0, "governorGloss": "ROOT", "dependent": 2, "dependentGloss": "likes"}, {"dep": "nsubj", "governor": 2, "governorGloss": "likes", "dependent": 1, "dependentGloss": "Royal"}, {"dep": "dobj", "governor": 2, "governorGloss": "likes", "dependent": 3, "dependentGloss": "M

### To Do:
- include arguments to specify port number
- readme instruction to run the server first
- write one json per line and process it. The way you are doing it now is janky.