In [3]:
from stanfordnlp.server import CoreNLPClient
import os

text = "Chris Manning is a nice person. Chris wrote a simple sentence. He also gives oranges to people."
print('starting up Java Stanford CoreNLP Server...')

os.environ["CORENLP_HOME"] = '/pi/stanford-corenlp-full-2018-10-05'
client=CoreNLPClient(annotators=['tokenize','ssplit','pos','lemma','ner','parse','depparse','coref'], timeout=60000, memory='16G')

starting up Java Stanford CoreNLP Server...


In [4]:
# submit the request to the server
ann = client.annotate(text)

# get the first sentence
sentence = ann.sentence[0]

# get the constituency parse of the first sentence
print('---')
print('constituency parse of first sentence')
constituency_parse = sentence.parseTree
print(constituency_parse)

# get the first subtree of the constituency parse
print('---')
print('first subtree of constituency parse')
print(constituency_parse.child[0])

# get the value of the first subtree
print('---')
print('value of first subtree of constituency parse')
print(constituency_parse.child[0].value)

# get the dependency parse of the first sentence
print('---')
print('dependency parse of first sentence')
dependency_parse = sentence.basicDependencies
print(dependency_parse)

Starting server with command: java -Xmx16G -cp /pi/stanford-corenlp-full-2018-10-05/* edu.stanford.nlp.pipeline.StanfordCoreNLPServer -port 9000 -timeout 60000 -threads 5 -maxCharLength 100000 -quiet True -serverProperties corenlp_server-17432efeeaa445d2.props -preload tokenize,ssplit,pos,lemma,ner,parse,depparse,coref
---
constituency parse of first sentence
child {
  child {
    child {
      child {
        value: "Chris"
      }
      value: "NNP"
    }
    child {
      child {
        value: "Manning"
      }
      value: "NNP"
    }
    value: "NP"
  }
  child {
    child {
      child {
        value: "is"
      }
      value: "VBZ"
    }
    child {
      child {
        child {
          value: "a"
        }
        value: "DT"
      }
      child {
        child {
          value: "nice"
        }
        value: "JJ"
      }
      child {
        child {
          value: "person"
        }
        value: "NN"
      }
      value: "NP"
    }
    value: "VP"
  }
  child {
    

In [5]:
# get the first token of the first sentence
print('---')
print('first token of first sentence')
token = sentence.token[0]
print(token)

---
first token of first sentence
word: "Chris"
pos: "NNP"
value: "Chris"
before: ""
after: " "
originalText: "Chris"
ner: "PERSON"
lemma: "Chris"
beginChar: 0
endChar: 5
utterance: 0
speaker: "PER0"
tokenBeginIndex: 0
tokenEndIndex: 1
hasXmlContext: false
isNewline: false
coarseNER: "PERSON"
fineGrainedNER: "PERSON"
corefMentionIndex: 0
entityMentionIndex: 0



In [6]:
# get the part-of-speech tag
print('---')
print('part of speech tag of token')
token.pos
print(token.pos)

---
part of speech tag of token
NNP


In [7]:
# get the named entity tag
print('---')
print('named entity tag of token')
print(token.ner)


---
named entity tag of token
PERSON


In [8]:
# get an entity mention from the first sentence
print('---')
print('first entity mention in sentence')
print(sentence.mentions[0])

# access the coref chain
print('---')
print('coref chains for the example')
print(ann.corefChain)

---
first entity mention in sentence
sentenceIndex: 0
tokenStartInSentenceInclusive: 0
tokenEndInSentenceExclusive: 2
ner: "PERSON"
entityType: "PERSON"
entityMentionIndex: 0
canonicalEntityMentionIndex: 0
entityMentionText: "Chris Manning"

---
coref chains for the example
[chainID: 5
mention {
  mentionID: 0
  mentionType: "PROPER"
  number: "SINGULAR"
  gender: "MALE"
  animacy: "ANIMATE"
  beginIndex: 0
  endIndex: 2
  headIndex: 1
  sentenceIndex: 0
  position: 1
}
mention {
  mentionID: 2
  mentionType: "PROPER"
  number: "SINGULAR"
  gender: "MALE"
  animacy: "ANIMATE"
  beginIndex: 0
  endIndex: 1
  headIndex: 0
  sentenceIndex: 1
  position: 1
}
mention {
  mentionID: 5
  mentionType: "PRONOMINAL"
  number: "SINGULAR"
  gender: "MALE"
  animacy: "ANIMATE"
  beginIndex: 0
  endIndex: 1
  headIndex: 0
  sentenceIndex: 2
  position: 2
}
representative: 0
]


In [9]:
# Use tokensregex patterns to find who wrote a sentence.
pattern = '([ner: PERSON]+) /wrote/ /an?/ []{0,3} /sentence|article/'
matches = client.tokensregex(text, pattern)
# sentences contains a list with matches for each sentence.
assert len(matches["sentences"]) == 3
# length tells you whether or not there are any matches in this
assert matches["sentences"][1]["length"] == 1
# You can access matches like most regex groups.
matches["sentences"][1]["0"]["text"] == "Chris wrote a simple sentence"
matches["sentences"][1]["0"]["1"]["text"] == "Chris"

True

In [11]:
# Use semgrex patterns to directly find who wrote what.
pattern = '{word:wrote} >nsubj {}=subject >dobj {}=object'
matches = client.semgrex(text, pattern)
# sentences contains a list with matches for each sentence.
assert len(matches["sentences"]) == 3
# length tells you whether or not there are any matches in this
assert matches["sentences"][1]["length"] == 1
# You can access matches like most regex groups.
matches["sentences"][1]["0"]["text"] == "wrote"
matches["sentences"][1]["0"]["$subject"]["text"] == "Chris"
matches["sentences"][1]["0"]["$object"]["text"] == "sentence"

True

## Register a set of properties with the client’s properties_cache, use key
```python
FRENCH_CUSTOM_PROPS = {'annotators': 'tokenize,ssplit,pos,parse', 'tokenize.language': 'fr',
                       'pos.model': 'edu/stanford/nlp/models/pos-tagger/french/french.tagger',
                       'parse.model': 'edu/stanford/nlp/models/lexparser/frenchFactored.ser.gz',
                       'outputFormat': 'text'}

with CoreNLPClient(annotators='tokenize,ssplit,pos') as client:
    client.register_properties_key('fr-custom', FRENCH_CUSTOM_PROPS)
    ann = client.annotate(text, properties_key='fr-custom')

# Set request properties as a Python dictionary
ann = client.annotate(text, properties=FRENCH_CUSTOM_PROPS)
```

Specify a StanfordCoreNLP supported language
ann = client.annotate(text, properties='german')

In [17]:
def disp_tree(text):
    ann = client.annotate(text)
    sentence = ann.sentence[0]
    dependency_parse = sentence.basicDependencies
    print(dependency_parse)
    
    token = sentence.token[4]
    print(token)
    token = sentence.token[2]
    print(token)

disp_tree('The car is red.')

node {
  sentenceIndex: 0
  index: 1
}
node {
  sentenceIndex: 0
  index: 2
}
node {
  sentenceIndex: 0
  index: 3
}
node {
  sentenceIndex: 0
  index: 4
}
node {
  sentenceIndex: 0
  index: 5
}
edge {
  source: 2
  target: 1
  dep: "det"
  isExtra: false
  sourceCopy: 0
  targetCopy: 0
  language: UniversalEnglish
}
edge {
  source: 4
  target: 2
  dep: "nsubj"
  isExtra: false
  sourceCopy: 0
  targetCopy: 0
  language: UniversalEnglish
}
edge {
  source: 4
  target: 3
  dep: "cop"
  isExtra: false
  sourceCopy: 0
  targetCopy: 0
  language: UniversalEnglish
}
edge {
  source: 4
  target: 5
  dep: "punct"
  isExtra: false
  sourceCopy: 0
  targetCopy: 0
  language: UniversalEnglish
}
root: 4

word: "."
pos: "."
value: "."
before: ""
after: ""
originalText: "."
ner: "O"
lemma: "."
beginChar: 14
endChar: 15
utterance: 0
speaker: "PER0"
tokenBeginIndex: 4
tokenEndIndex: 5
hasXmlContext: false
isNewline: false
coarseNER: "O"
fineGrainedNER: "O"

word: "is"
pos: "VBZ"
value: "is"
before: 