In [1]:
#import nltk
#nltk.download('wordnet')
#nltk.download('punkt')

In [33]:
import nltk
from nltk.corpus import wordnet as wn
from nltk.tokenize import word_tokenize

## Word Senses
Words can have multiple meanings. WordNet organizes word senses into a structure called synsets.

Each word can have multiple synsets, each synset represents a different meaning of that word.

In [35]:
def get_senses(word):
    """
    Returns a list of senses (synsets) of a word
    """
    word_senses = wn.synsets(word)
    return word_senses

def get_definition(word_sense):
    return word_sense.definition()

def get_synonyms(word_sense):
    synonyms = []
    for lemma in word_sense.lemmas():
        synonym = lemma.name().replace('_', ' ')
        synonyms.append(synonym)
    return synonyms

## Hypernyms/Hyponyms
For example, red is a specific kind of color, or microbe is a kind of organism. These are example of hyponym relationships. If X is-a Y then X is a hyponym of Y, and Y is a hypernym of X. So red is a hyponym of color and color is a hypernym of red.

In WordNet, each word sense (synset) has its own hypernyms and hyponyms.

In [36]:
hyper = lambda s: s.hypernyms()
hypo = lambda s: s.hyponyms()

def get_hypernyms(word_sense, depth=5):
    return list(word_sense.closure(hyper, depth=depth))

def get_hyponyms(word_sense, depth=5):
    return list(word_sense.closure(hypo, depth=depth))

## Manually annotate Senses and Hypernyms/Hyponyms


In [41]:
wn.synsets("create")

[Synset('make.v.03'),
 Synset('create.v.02'),
 Synset('create.v.03'),
 Synset('create.v.04'),
 Synset('create.v.05'),
 Synset('produce.v.02')]

In [37]:
def annotate_synsets(sentences):
  """This function queries WordNet for each word in a list of sentences,
     and asks the user to input a number corresponding to the synset."""
  
  word_senses = {}
  # Cached selections maps from word string to the previous
  # selection for this word (an integer)
  cached_selections = {}

  for i, sent in enumerate(sentences):
    print("-----------\n%s\n----------" %sent.upper())
    words = word_tokenize(sent.lower())

    for word in words:
      synsets = wn.synsets(word)
      if len(synsets) != 0:
        selection = select_synset(sent, word, synsets, cached_selections)
        if selection != None:
          cached_selections[word] = selection
          if selection < len(synsets):
            s = synsets[selection]
            word_senses[word] = s.name()
      print()
  print("===")
  return word_senses


def select_synset(sent, word, synsets, cached_selections):
  """Ask the user to select which sense of the word  
     is being used in this sentence."""
  print(word.upper())

  prev_selection = -1
  if word in cached_selections:
    prev_selection = cached_selections[word]

  for choice, s in enumerate(synsets):
    if choice == prev_selection:
      print("*** ", end = '')
    print("%d) %s - %s" % (choice, s.name(), s.definition()))

  choice += 1
  if choice == prev_selection:
    print("*** ", end = '')
  print("%d) None of these." % choice)

  selection = -1
  while selection == -1:
    try:
      user_input = input(">")
      if user_input.strip() == 'x':
        # The user can press 'x' to exit.
        return None
      if user_input.strip() == '' and prev_selection > -1:
        # The user can press retrun to confirm the previous selection.
        return prev_selection
      selection = int(user_input)
    except:
      selection = -1
    if selection < 0 or selection > len(synsets):
      print("Please select a number between 0-%d, or type 'x' to exit" % len(synsets))
      if prev_selection > -1:
        print("You can also press return to confirm the previous selection (marked by ***).")
    else:
      return selection


def confirm_hyponyms(word, sysnset, do_hypernyms_instead=False):
  """Ask the user to confirm which of the hyponyms are applicable 
     for this sentence."""
  print("\n",word.upper())

  confirmed = []
  if do_hypernyms_instead:
    unconfirmed = sysnset.hypernyms()
  else:
    unconfirmed = sysnset.hyponyms()

  while len(unconfirmed) > 0:
    s = unconfirmed.pop(0)
    print("Is %s an appropriate substitute for %s? (y/n)" % (s.name(), word))
    print("It means:", s.definition())
    print("Synonyms are:", get_synonyms(s))
    user_input = ''
    while user_input == '':
      user_input = input(">")
      user_input = user_input.strip()
      if user_input == 'y' or user_input == 'yes':
        confirmed.append(s.name())
        if do_hypernyms_instead:
          unconfirmed.extend(s.hypernyms())
        else:
          unconfirmed.extend(s.hyponyms())
        
      elif user_input == 'n' or user_input == 'no':
        pass
      elif user_input == 'x':
        # The user can press 'x' to exit.
        return confirmed
      else:
        print("Please type 'yes' or 'no' or 'x' to stop confirming for this word")
        user_input = ''
      print()
  return confirmed

# Save your annotations to a file, so that you can submit them with your homework.
def save_to_drive(word_senses, confirmed_hyponyms, confirmed_hypernyms):
  import json
  from google.colab import drive
  drive.mount('/content/drive/')

  output_file = '/content/drive/My Drive/word-sense-annotations.json'
  output_json = {}
  output_json['senses'] = word_senses
  output_json['hyponyms'] = confirmed_hyponyms
  output_json['hypernyms'] = confirmed_hypernyms

  with open(output_file, 'w') as write_file:
    write_file.write(json.dumps(output_json, sort_keys=True, indent=4))
    write_file.write('\n')

#TODO: Sua loi huhu !!!!!!!!!!!!!!!!!
def save_to_file(word_senses, confirmed_hyponyms, confirmed_hypernyms):
  import json

  output_file = 'word-sense-annotations.json'
  output_json = {}
  output_json['senses'] = word_senses
  output_json['hyponyms'] = confirmed_hyponyms
  output_json['hypernyms'] = confirmed_hypernyms

  with open(output_file, 'w') as f:
      json.dump(output_json, f, ensure_ascii=False, sort_keys=True, indent=4)

def read_from_file():
    import json
    
    output_file = "word-sense-annotations.json"
    with open(output_file, 'r') as f:
        data = json.load(f)
        
    word_senses = data['senses']
    confirmed_hypernyms = data['hypernyms']
    confirmed_hyponyms = data['hyponyms']
    
    return word_senses, confirmed_hypernyms, confirmed_hyponyms

In [38]:
commands = [
    'give fish to troll',
    'go north'
]

In [39]:
# Test cell

print("ANNOTATING SYNSETS...")
word_senses = annotate_synsets(commands)
confirmed_hyponyms = {}
confirmed_hypernyms = {}

for word in word_senses:
    print("First, pick the word sense for the word '%s'..." %word.upper())
    word_sense = wn.synset(word_senses[word])
    print("Next, pick which hypernyms of %s we should allow players to use..." %word_sense.name().upper())
    confirmed_hypernyms[word] = confirm_hyponyms(word, word_sense, do_hypernyms_instead=True)
    print("Finally, pick which hyponyms of %s we should allow players to use..." %word_sense.name().upper())
    confirmed_hyponyms[word] = confirm_hyponyms(word, word_sense)

print("You've done annotating!")
#save_to_file(word_sense, confirmed_hyponyms, confirmed_hypernyms)


ANNOTATING SYNSETS...
-----------
GIVE FISH TO TROLL
----------
GIVE
0) give.n.01 - the elasticity of something that can be stretched and returns to its original length
1) give.v.01 - cause to have, in the abstract sense or physical sense
2) yield.v.01 - be the cause or source of
3) give.v.03 - transfer possession of something concrete or abstract to somebody
4) give.v.04 - convey or reveal information
5) give.v.05 - convey, as of a compliment, regards, attention, etc.; bestow
6) hold.v.03 - organize or be responsible for
7) give.v.07 - convey or communicate; of a smile, a look, a physical gesture
8) give.v.08 - give as a present; make a gift of
9) give.v.09 - cause to happen or be responsible for
10) give.v.10 - dedicate
11) render.v.04 - give or supply
12) impart.v.01 - transmit (knowledge or skills)
13) establish.v.05 - bring about
14) give.v.14 - leave with; give temporarily
15) give.v.15 - emit or utter
16) sacrifice.v.01 - endure the loss of
17) pass.v.05 - place into the hands o

> 3



FISH
0) fish.n.01 - any of various mostly cold-blooded aquatic vertebrates usually having scales and breathing through gills
1) fish.n.02 - the flesh of fish used as food
2) pisces.n.02 - (astrology) a person who is born while the sun is in Pisces
3) pisces.n.01 - the twelfth sign of the zodiac; the sun is in this sign from about February 19 to March 20
4) fish.v.01 - seek indirectly
5) fish.v.02 - catch or try to catch fish or shellfish
6) None of these.


> 0




TROLL
0) troll.n.01 - (Scandanavian folklore) a supernatural creature (either a dwarf or a giant) that is supposed to live in caves or in the mountains
1) round.n.11 - a partsong in which voices follow each other; one voice starts and others join in one after another until all are singing different parts of the song at the same time
2) troll.n.03 - a fisherman's lure that is used in trolling
3) troll.n.04 - angling by drawing a baited line through the water
4) troll.v.01 - circulate, move around
5) troll.v.02 - cause to move round and round
6) troll.v.03 - sing the parts of (a round) in succession
7) troll.v.04 - angle with a hook and line drawn through the water
8) troll.v.05 - sing loudly and without inhibition
9) troll.v.06 - praise or celebrate in song
10) troll.v.07 - speak or recite rapidly or in a rolling voice
11) None of these.


> 0



-----------
GO NORTH
----------
GO
0) go.n.01 - a time for working (after which you will be relieved by someone else)
1) adam.n.03 - street names for methylenedioxymethamphetamine
2) crack.n.09 - a usually brief attempt
3) go.n.04 - a board game for two players who place counters on a grid; the object is to surround and so capture the opponent's counters
4) travel.v.01 - change location; move, travel, or proceed, also metaphorically
5) go.v.02 - follow a procedure or take a course
6) go.v.03 - move away from a place into another direction
7) become.v.01 - enter or assume a certain state or condition
8) go.v.05 - be awarded; be allotted
9) run.v.05 - have a particular form
10) run.v.03 - stretch out over a distance, space, time, or scope; run or extend between two points or beyond a certain point
11) proceed.v.04 - follow a certain course
12) go.v.09 - be abolished or discarded
13) go.v.10 - be or continue to be in a certain condition
14) sound.v.02 - make a certain noise or sound
15) 

> 5



NORTH
0) north.n.01 - the region of the United States lying to the north of the Mason-Dixon line
1) union.n.02 - the United States (especially the northern states during the American Civil War)
2) north.n.03 - the cardinal compass point that is at 0 or 360 degrees
3) north.n.04 - a location in the northern part of a country, region, or city
4) north.n.05 - the direction corresponding to the northward cardinal compass point
5) north.n.06 - the direction in which a compass needle points
6) north.n.07 - British statesman under George III whose policies led to rebellion in the American colonies (1732-1792)
7) north.a.01 - situated in or facing or moving toward or coming from the north
8) north.r.01 - in a northern direction
9) None of these.


> 4



===
First, pick the word sense for the word 'GIVE'...
Next, pick which hypernyms of GIVE.V.03 we should allow players to use...

 GIVE
Is transfer.v.05 an appropriate substitute for give? (y/n)
It means: cause to change ownership
Synonyms are: ['transfer']


> n



Finally, pick which hyponyms of GIVE.V.03 we should allow players to use...

 GIVE
Is accord.v.02 an appropriate substitute for give? (y/n)
It means: allow to have
Synonyms are: ['accord', 'allot', 'grant']


> x


First, pick the word sense for the word 'FISH'...
Next, pick which hypernyms of FISH.N.01 we should allow players to use...

 FISH
Is aquatic_vertebrate.n.01 an appropriate substitute for fish? (y/n)
It means: animal living wholly or chiefly in or on water
Synonyms are: ['aquatic vertebrate']


> n



Finally, pick which hyponyms of FISH.N.01 we should allow players to use...

 FISH
Is bony_fish.n.01 an appropriate substitute for fish? (y/n)
It means: any fish of the class Osteichthyes
Synonyms are: ['bony fish']


> n



Is bottom-feeder.n.02 an appropriate substitute for fish? (y/n)
It means: a fish that lives and feeds on the bottom of a body of water
Synonyms are: ['bottom-feeder', 'bottom-dweller']


> n



Is bottom_lurkers.n.01 an appropriate substitute for fish? (y/n)
It means: a fish that lurks on the bottom of a body of water
Synonyms are: ['bottom lurkers']


> x


First, pick the word sense for the word 'TROLL'...
Next, pick which hypernyms of TROLL.N.01 we should allow players to use...

 TROLL
Is mythical_monster.n.01 an appropriate substitute for troll? (y/n)
It means: a monster renowned in folklore and myth
Synonyms are: ['mythical monster', 'mythical creature']


> y



Is monster.n.01 an appropriate substitute for troll? (y/n)
It means: an imaginary creature usually having various human and animal parts
Synonyms are: ['monster']


> y



Is mythical_being.n.01 an appropriate substitute for troll? (y/n)
It means: an imaginary being of myth or fable
Synonyms are: ['mythical being']


> n



Is imaginary_being.n.01 an appropriate substitute for troll? (y/n)
It means: a creature of the imagination; a person that exists only in legends or myths or fiction
Synonyms are: ['imaginary being', 'imaginary creature']


> x


Finally, pick which hyponyms of TROLL.N.01 we should allow players to use...

 TROLL
First, pick the word sense for the word 'GO'...
Next, pick which hypernyms of GO.V.02 we should allow players to use...

 GO
Is act.v.01 an appropriate substitute for go? (y/n)
It means: perform an action, or work out or perform (an action)
Synonyms are: ['act', 'move']


> n



Finally, pick which hyponyms of GO.V.02 we should allow players to use...

 GO
Is steamroller.v.02 an appropriate substitute for go? (y/n)
It means: proceed with great force
Synonyms are: ['steamroller', 'steamroll']


> n



Is venture.v.01 an appropriate substitute for go? (y/n)
It means: proceed somewhere despite the risk of possible dangers
Synonyms are: ['venture', 'embark']


> y



Is work.v.09 an appropriate substitute for go? (y/n)
It means: proceed towards a goal or along a path or through an activity
Synonyms are: ['work']


> n



First, pick the word sense for the word 'NORTH'...
Next, pick which hypernyms of NORTH.N.05 we should allow players to use...

 NORTH
Is direction.n.02 an appropriate substitute for north? (y/n)
It means: the spatial relation between something and the course along which it points or moves
Synonyms are: ['direction']


> n



Finally, pick which hyponyms of NORTH.N.05 we should allow players to use...

 NORTH
You've done annotating!
Saving your annotation to local file ('word-sense-annotations.json')...


> x


In [42]:
import json

output_file = 'word-sense-annotations.json'
output_json = {}
output_json['senses'] = word_senses
output_json['hyponyms'] = confirmed_hyponyms
output_json['hypernyms'] = confirmed_hypernyms

print("Saving your annotation to local file ('word-sense-annotations.json')...")
with open(output_file, 'w') as f:
    json.dump(output_json, f, ensure_ascii=False, sort_keys=True, indent=4)

Saving your annotation to local file ('word-sense-annotations.json')...


### Look over Annotations

In [43]:
# read from file
(word_senses, confirmed_hypernyms, confirmed_hyponyms) = read_from_file()

In [44]:
word_senses

{'fish': 'fish.n.01',
 'give': 'give.v.03',
 'go': 'go.v.02',
 'north': 'north.n.05',
 'troll': 'troll.n.01'}

In [45]:
for word in word_senses:
    print(word.upper())
    word_sense = wn.synset(word_senses[word])
    print("Synonyms:", get_synonyms(word_sense))
    
    print("Hypernyms:",)
    for hypernym in confirmed_hypernyms[word]:
        print("\t", get_synonyms(wn.synset(hypernym)))
    
    print("Hyponyms:",)
    hyponyms = confirmed_hyponyms[word]
    for hyponym in hyponyms:
        print("\t", get_synonyms(wn.synset(hyponym)))
    print("---")

FISH
Synonyms: ['fish']
Hypernyms:
Hyponyms:
---
GIVE
Synonyms: ['give']
Hypernyms:
Hyponyms:
---
GO
Synonyms: ['go', 'proceed', 'move']
Hypernyms:
Hyponyms:
	 ['venture', 'embark']
---
NORTH
Synonyms: ['north']
Hypernyms:
Hyponyms:
---
TROLL
Synonyms: ['troll']
Hypernyms:
	 ['mythical monster', 'mythical creature']
	 ['monster']
Hyponyms:
---


In [46]:
confirmed_hyponyms

{'fish': [], 'give': [], 'go': ['venture.v.01'], 'north': [], 'troll': []}

## Enumnerate Alternative Wordings of Commands
Ouput a set of reasonably accurate paraphrases for the commands in our game

In [51]:
import itertools

def get_alternatives(word, word_senses, confirmed_hypernyms, confirmed_hyponyms):
    """
    Create a list of reasonable alternative for a word by listing out the synonyms for its word sense, and for its hyponyms and hypernyms
    """
    alternatives = []
    if not word in word_senses:
        alternatives.append(word)
        return alternatives
    
    word_sense = wn.synset(word_senses[word])
    alternatives.extend(get_synonyms(word_sense))
    for hypernym in confirmed_hypernyms[word]:
        alternatives.extend(get_synonyms(wn.synset(hypernym)))
    for hyponym in confirmed_hyponyms[word]:
        alternatives.extend(get_synonyms(wn.synset(hyponym)))
    return alternatives


def enumerate_alternatives(sentence, word_senses, confirmed_hypernyms, confirmed_hyponyms):
    """
    Enumerate all of the sentences that can result
    by taking any combination of the alternates for each word in the sentence
    """
    words = word_tokenize(sentence.lower())
    # 2-D list
    alternatives_per_word = []
    for word in words:
        alternatives = get_alternatives(word, word_senses, confirmed_hypernyms, confirmed_hyponyms)
        alternatives_per_word.append(alternatives)
    
    # combination of 2-D lists
    alternative_to_original = {}
    for word in list(itertools.product(*alternatives_per_word)):
        alt_sent = " ".join(words)
        alternative_to_original[alt_sent] = sentence
    return alternative_to_original

In [50]:
alternative_commands = {}
for command in commands:
    alternative_commands.update(enumerate_alternatives(command,
                                                        word_senses,
                                                        confirmed_hypernyms,
                                                        confirmed_hyponyms))

for alt_sent in alternative_commands:
    print("%s => %s" %(alt_sent, alternative_commands[alt_sent]))
print("Congratulations, you can now handle %d commands instead of just %d!" %(len(alternative_commands.keys()), len(commands)))

give fish to troll => give fish to troll
go north => go north
Congratulations, you can now handle 2 commands instead of just 2!
