In [None]:
from nltk.parse.stanford import StanfordParser

In [15]:
from stanfordcorenlp import StanfordCoreNLP
import json
nlp = StanfordCoreNLP(r'D:\pythonProject\coreNLP\stanford-corenlp', memory='8g')
#stanford-corenlp download in http://nlp.stanford.edu/software/stanford-corenlp-full-2018-02-27.zip

## CoreNLP Function Example



In [11]:
sentence = 'Guangdong University of Foreign Studies is located in Guangzhou.'

In [12]:
print('Tokenize:', nlp.word_tokenize(sentence))

Tokenize: ['Guangdong', 'University', 'of', 'Foreign', 'Studies', 'is', 'located', 'in', 'Guangzhou', '.']


In [58]:
print('Part of Speech:', nlp.pos_tag(sentence))


Part of Speech: [('Guangdong', 'NNP'), ('University', 'NNP'), ('of', 'IN'), ('Foreign', 'NNP'), ('Studies', 'NNPS'), ('is', 'VBZ'), ('located', 'JJ'), ('in', 'IN'), ('Guangzhou', 'NNP'), ('.', '.')]


In [14]:
print('Named Entities:', nlp.ner(sentence) )


Named Entities: [('Guangdong', 'ORGANIZATION'), ('University', 'ORGANIZATION'), ('of', 'ORGANIZATION'), ('Foreign', 'ORGANIZATION'), ('Studies', 'ORGANIZATION'), ('is', 'O'), ('located', 'O'), ('in', 'O'), ('Guangzhou', 'CITY'), ('.', 'O')]


In [15]:
print ('Constituency Parsing:', nlp.parse(sentence))


Constituency Parsing: (ROOT
  (S
    (NP
      (NP (NNP Guangdong) (NNP University))
      (PP (IN of)
        (NP (NNP Foreign) (NNPS Studies))))
    (VP (VBZ is)
      (ADJP (JJ located)
        (PP (IN in)
          (NP (NNP Guangzhou)))))
    (. .)))


In [16]:
print ('Dependency Parsing:', nlp.dependency_parse(sentence))

Dependency Parsing: [('ROOT', 0, 7), ('compound', 2, 1), ('nsubjpass', 7, 2), ('case', 5, 3), ('compound', 5, 4), ('nmod', 2, 5), ('auxpass', 7, 6), ('case', 9, 8), ('nmod', 7, 9), ('punct', 7, 10)]


## Coreference Example 1

In [28]:
text = 'Barack Obama was born in Hawaii. He is the president. Obama was elected in 2008.'

In [53]:
props={'annotators': 'dcoref','pipelineLanguage':'en','outputFormat':'json'}

In [54]:
result_dict = json.loads(nlp.annotate(text, properties=props))

In [55]:
for idx, mentions in result_dict['corefs'].items():
    print('Entity:', idx)
    for m in mentions:
        print(m)

Entity: 1
{'id': 1, 'text': 'Barack Obama', 'type': 'PROPER', 'number': 'SINGULAR', 'gender': 'MALE', 'animacy': 'ANIMATE', 'startIndex': 1, 'endIndex': 3, 'headIndex': 2, 'sentNum': 1, 'position': [1, 1], 'isRepresentativeMention': True}
{'id': 3, 'text': 'He', 'type': 'PRONOMINAL', 'number': 'SINGULAR', 'gender': 'MALE', 'animacy': 'ANIMATE', 'startIndex': 1, 'endIndex': 2, 'headIndex': 1, 'sentNum': 2, 'position': [2, 1], 'isRepresentativeMention': False}
{'id': 4, 'text': 'the president', 'type': 'NOMINAL', 'number': 'SINGULAR', 'gender': 'MALE', 'animacy': 'ANIMATE', 'startIndex': 3, 'endIndex': 5, 'headIndex': 4, 'sentNum': 2, 'position': [2, 2], 'isRepresentativeMention': False}
{'id': 5, 'text': 'Obama', 'type': 'PROPER', 'number': 'SINGULAR', 'gender': 'MALE', 'animacy': 'ANIMATE', 'startIndex': 1, 'endIndex': 2, 'headIndex': 1, 'sentNum': 3, 'position': [3, 1], 'isRepresentativeMention': False}
Entity: 2
{'id': 2, 'text': 'Hawaii', 'type': 'PROPER', 'number': 'SINGULAR', 'gen

## Coreference Example 2

![title](img/coref.png)

In [4]:
##Each tuple represents (sentence_index, start_index, end_index, text), starts with 1-index##
text = "My sister has a friend called John. Really, tell me more about him? She think he is so funny!"
print(nlp.coref(text))

[[(1, 1, 2, 'My'), (2, 4, 5, 'me')], [(1, 1, 3, 'My sister'), (3, 1, 2, 'She')], [(1, 7, 8, 'John'), (2, 7, 8, 'him'), (3, 3, 4, 'he')]]


In [8]:
pros = {'annotators': 'coref', 'pinelineLanguage': 'en'}
result_dict = json.loads(nlp.annotate(text, properties=pros))

for idx, mentions in result_dict['corefs'].items():
    print('Entity:', idx)
    for m in mentions:
        print(m)

Entity: 4
{'id': 1, 'text': 'My', 'type': 'PRONOMINAL', 'number': 'SINGULAR', 'gender': 'UNKNOWN', 'animacy': 'ANIMATE', 'startIndex': 1, 'endIndex': 2, 'headIndex': 1, 'sentNum': 1, 'position': [1, 2], 'isRepresentativeMention': True}
{'id': 4, 'text': 'me', 'type': 'PRONOMINAL', 'number': 'SINGULAR', 'gender': 'UNKNOWN', 'animacy': 'ANIMATE', 'startIndex': 4, 'endIndex': 5, 'headIndex': 4, 'sentNum': 2, 'position': [2, 1], 'isRepresentativeMention': False}
Entity: 6
{'id': 2, 'text': 'My sister', 'type': 'NOMINAL', 'number': 'SINGULAR', 'gender': 'FEMALE', 'animacy': 'ANIMATE', 'startIndex': 1, 'endIndex': 3, 'headIndex': 2, 'sentNum': 1, 'position': [1, 3], 'isRepresentativeMention': True}
{'id': 6, 'text': 'She', 'type': 'PRONOMINAL', 'number': 'SINGULAR', 'gender': 'FEMALE', 'animacy': 'ANIMATE', 'startIndex': 1, 'endIndex': 2, 'headIndex': 1, 'sentNum': 3, 'position': [3, 1], 'isRepresentativeMention': False}
Entity: 7
{'id': 0, 'text': 'John', 'type': 'PROPER', 'number': 'SINGUL

In [14]:
for idx, mentions in result_dict['corefs'].items():
    print('-----------')
    for m in mentions:
        print('sentNum: ' + str(m['sentNum']))
        print('startIndex: ' + str(m['startIndex']))
        print('endIndex: ' + str(m['endIndex']))
        print('text: ' + m['text'])
        print('########################')
    

-----------
sentNum: 1
startIndex: 1
endIndex: 2
text: My
########################
sentNum: 2
startIndex: 4
endIndex: 5
text: me
########################
-----------
sentNum: 1
startIndex: 1
endIndex: 3
text: My sister
########################
sentNum: 3
startIndex: 1
endIndex: 2
text: She
########################
-----------
sentNum: 1
startIndex: 7
endIndex: 8
text: John
########################
sentNum: 2
startIndex: 7
endIndex: 8
text: him
########################
sentNum: 3
startIndex: 3
endIndex: 4
text: he
########################
