In [1]:
import json
from stanfordcorenlp import StanfordCoreNLP

# Start server before running
# java -mx4G -cp "*" edu.stanford.nlp.pipeline.StanfordCoreNLPServer -port 9000 -timeout  15000
# Connect to server
host = 'http://localhost'
port = 9000
nlp = StanfordCoreNLP(host, port=port,timeout=15000)

# Triplets

In [5]:
s='Twenty percent electric motors are pulled from an assembly line'
s1='Brack Obama was born in Hawaii'
# s = input('Sentence: ')

output = nlp.annotate(s1, properties={"annotators":"tokenize,ssplit,pos,depparse,natlog,openie",
                                    "outputFormat": "json",
                                    "openie.triple.strict":"true",
                                    "openie.max_entailments_per_clause":"1"})
print(output)
output = json.loads(output)

{
  "sentences": [
    {
      "index": 0,
      "basicDependencies": [
        {
          "dep": "ROOT",
          "governor": 0,
          "governorGloss": "ROOT",
          "dependent": 4,
          "dependentGloss": "born"
        },
        {
          "dep": "compound",
          "governor": 2,
          "governorGloss": "Obama",
          "dependent": 1,
          "dependentGloss": "Brack"
        },
        {
          "dep": "nsubjpass",
          "governor": 4,
          "governorGloss": "born",
          "dependent": 2,
          "dependentGloss": "Obama"
        },
        {
          "dep": "auxpass",
          "governor": 4,
          "governorGloss": "born",
          "dependent": 3,
          "dependentGloss": "was"
        },
        {
          "dep": "case",
          "governor": 6,
          "governorGloss": "Hawaii",
          "dependent": 5,
          "dependentGloss": "in"
        },
        {
          "dep": "nmod",
          "governor": 4,
          "governor

In [9]:
print("The subject, object and verb/relation of the given sentence are")
for openie_result in output["sentences"]:
    triplets = openie_result['openie']
    for triplet in triplets:
        print(triplet['subject'] + ' -> ' + triplet['relation'] + ' -> ' + triplet['object'])

The subject, object and verb/relation of the given sentence are
Brack Obama -> was born in -> Hawaii
Brack Obama -> was -> born


# WordNet
Ref: **Dive into WordNet with NLTK** on [Medium](https://medium.com/parrot-prediction/dive-into-wordnet-with-nltk-b313c480e788) by _[Norbert Kozlowski](https://medium.com/@don_khozzy)_

In [10]:
from nltk.corpus import wordnet as wn
# unambiguous word
wn.synsets('motorcar')

[Synset('car.n.01')]

In [11]:
wn.synset('car.n.01').lemma_names()

['car', 'auto', 'automobile', 'machine', 'motorcar']

In [12]:
# ambiguous word
wn.synsets('printer')

[Synset('printer.n.01'), Synset('printer.n.02'), Synset('printer.n.03')]

In [14]:
for synset in wn.synsets('printer'):
    print("\tLemma: {}".format(synset.name()))
    print("\tDefinition: {}".format(synset.definition()))
    print("\tExample: {}".format(synset.examples()))
    print()

	Lemma: printer.n.01
	Definition: someone whose occupation is printing
	Example: []

	Lemma: printer.n.02
	Definition: (computer science) an output device that prints the results of data processing
	Example: []

	Lemma: printer.n.03
	Definition: a machine that prints
	Example: []



In [16]:
for synset in wn.synsets('printer'):
    print(synset.name(), ' -> ', synset.lemmas())

printer.n.01  ->  [Lemma('printer.n.01.printer'), Lemma('printer.n.01.pressman')]
printer.n.02  ->  [Lemma('printer.n.02.printer')]
printer.n.03  ->  [Lemma('printer.n.03.printer'), Lemma('printer.n.03.printing_machine')]


## Hyponym — a more specific concept
lower in the hierarchy (machine -> printer -> **typesetting_machine**)

In [17]:
machine_that_prints = wn.synset('printer.n.03')
sorted([lemma.name() for synset in machine_that_prints.hyponyms() for lemma in synset.lemmas()])

['Addressograph',
 'addressing_machine',
 'character-at-a-time_printer',
 'character_printer',
 'electrostatic_printer',
 'impact_printer',
 'line-at-a-time_printer',
 'line_printer',
 'page-at-a-time_printer',
 'page_printer',
 'printer',
 'serial_printer',
 'thermal_printer',
 'typesetting_machine']

## Hypernym — a more general concept.
higher in the hierarchy (typesetting_machine -> printer -> **machine**)

In [19]:
tree = wn.synset('tree.n.01')
# obtain parts
print(tree.part_meronyms())
# obtain substances
print(tree.substance_meronyms())

[Synset('burl.n.02'), Synset('crown.n.07'), Synset('limb.n.02'), Synset('stump.n.01'), Synset('trunk.n.01')]
[Synset('heartwood.n.01'), Synset('sapwood.n.01')]


## Holonym — denotes a membership to something
engine -> car

In [21]:
print(wn.synset('atom.n.01').part_holonyms())
 
print(wn.synset('hydrogen.n.01').substance_holonyms())

[Synset('chemical_element.n.01'), Synset('molecule.n.01')]
[Synset('water.n.01')]


## Meronym — denotes a part of something
plant_part -> plant

In [23]:
tree = wn.synset('tree.n.01')
 
print(tree.part_meronyms())
 
print(tree.substance_meronyms())

[Synset('burl.n.02'), Synset('crown.n.07'), Synset('limb.n.02'), Synset('stump.n.01'), Synset('trunk.n.01')]
[Synset('heartwood.n.01'), Synset('sapwood.n.01')]


## Entailment — denotes how verbs are involved

In [24]:
wn.synset('eat.v.01').entailments()

[Synset('chew.v.01'), Synset('swallow.v.01')]

In [25]:
## Similarity

In [26]:
truck = wn.synset('truck.n.01')
limousine = wn.synset('limousine.n.01')
 
truck.lowest_common_hypernyms(limousine)

[Synset('motor_vehicle.n.01')]

In [29]:
print(wn.synset('entity.n.01').min_depth())

print(wn.synset('car.n.01').min_depth())

print(wn.synset('horse.n.01').min_depth())

print(wn.synset('mare.n.01').min_depth())

0
10
14
15


In [28]:
train = wn.synset('train.n.01')
horse = wn.synset('horse.n.01')
animal = wn.synset('animal.n.01')
atom = wn.synset('atom.n.01')
 
print("Train => Horse: {}".format(train.path_similarity(horse)))
print("Horse => Train: {}".format(horse.path_similarity(train)))
print("Horse => Animal: {}".format(horse.path_similarity(animal)))
print("Train => Atom: {}".format(train.path_similarity(atom)))

Train => Horse: 0.058823529411764705
Horse => Train: 0.058823529411764705
Horse => Animal: 0.1111111111111111
Train => Atom: 0.09090909090909091
