In [1]:
import nltk
nltk.download('wordnet')
nltk.download('punkt')

[nltk_data] Downloading package wordnet to /home/ngfuong/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to /home/ngfuong/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [2]:
from nltk.corpus import wordnet as wn
from nltk.tokenize import word_tokenize

## Word Senses
Words can have multiple meanings. WordNet organizes word senses into a structure called synsets.

Each word can have multiple synsets, each synset represents a different meaning of that word.

In [3]:
def get_senses(word):
    """
    Returns a list of senses (synsets) of a word
    """
    word_senses = wn.synsets(word)
    return word_senses

def get_definition(word_sense):
    return word_sense.definition()

def get_synonyms(word_sense):
    synonyms = []
    for lemma in word_sense.lemmas():
        synonym = lemma.name().replace('_', ' ')
        synonyms.append(synonym)
    return synonyms

# Example: bug
word_senses = get_senses("bug")
for i, word_sense in enumerate(word_senses):
    print("Sense %d: %s" %(i, word_sense.name()))
    print("Definition:", get_definition(word_sense))
    print("Synonyms:", get_synonyms(word_sense))
    print()

Sense 0: bug.n.01
Definition: general term for any insect or similar creeping or crawling invertebrate
Synonyms: ['bug']

Sense 1: bug.n.02
Definition: a fault or defect in a computer program, system, or machine
Synonyms: ['bug', 'glitch']

Sense 2: bug.n.03
Definition: a small hidden microphone; for listening secretly
Synonyms: ['bug']

Sense 3: hemipterous_insect.n.01
Definition: insects with sucking mouthparts and forewings thickened and leathery at the base; usually show incomplete metamorphosis
Synonyms: ['hemipterous insect', 'bug', 'hemipteran', 'hemipteron']

Sense 4: microbe.n.01
Definition: a minute life form (especially a disease-causing bacterium); the term is not in technical use
Synonyms: ['microbe', 'bug', 'germ']

Sense 5: tease.v.01
Definition: annoy persistently
Synonyms: ['tease', 'badger', 'pester', 'bug', 'beleaguer']

Sense 6: wiretap.v.01
Definition: tap a telephone or telegraph wire to get information
Synonyms: ['wiretap', 'tap', 'intercept', 'bug']



## Hypernyms/Hyponyms
For example, red is a specific kind of color, or microbe is a kind of organism. These are example of hyponym relationships. If X is-a Y then X is a hyponym of Y, and Y is a hypernym of X. So red is a hyponym of color and color is a hypernym of red.

In WordNet, each word sense (synset) has its own hypernyms and hyponyms.

In [4]:
hyper = lambda s: s.hypernyms()
hypo = lambda s: s.hyponyms()

def get_hypernyms(word_sense, depth=5):
    return list(word_sense.closure(hyper, depth=depth))

def get_hyponyms(word_sense, depth=5):
    return list(word_sense.closure(hypo, depth=depth))

# Example: bug
word_senses = get_senses("bug")
for i, word_sense in enumerate(word_senses):
    """
    The synset names include a word from the set of synonyms,
    plus a POS (n for noun, v for verb) and
    the number of the sense (sense 01 is the most common sense)
    """
    print("\nSense %d: %s (%s)" %(i, word_sense.name(), get_definition(word_sense)))
    print("Hypernyms:")
    hypernyms = word_sense.hypernyms()
    while len(hypernyms)>0:
        print(word_sense.name(),"\tis a\t",hypernyms[0].name())
        word_sense = hypernyms[0]
        hypernyms = word_sense.hypernyms()


Sense 0: bug.n.01 (general term for any insect or similar creeping or crawling invertebrate)
Hypernyms:
bug.n.01 	is a	 insect.n.01
insect.n.01 	is a	 arthropod.n.01
arthropod.n.01 	is a	 invertebrate.n.01
invertebrate.n.01 	is a	 animal.n.01
animal.n.01 	is a	 organism.n.01
organism.n.01 	is a	 living_thing.n.01
living_thing.n.01 	is a	 whole.n.02
whole.n.02 	is a	 object.n.01
object.n.01 	is a	 physical_entity.n.01
physical_entity.n.01 	is a	 entity.n.01

Sense 1: bug.n.02 (a fault or defect in a computer program, system, or machine)
Hypernyms:
bug.n.02 	is a	 defect.n.03
defect.n.03 	is a	 imperfection.n.01
imperfection.n.01 	is a	 state.n.02
state.n.02 	is a	 attribute.n.02
attribute.n.02 	is a	 abstraction.n.06
abstraction.n.06 	is a	 entity.n.01

Sense 2: bug.n.03 (a small hidden microphone; for listening secretly)
Hypernyms:
bug.n.03 	is a	 microphone.n.01
microphone.n.01 	is a	 electro-acoustic_transducer.n.01
electro-acoustic_transducer.n.01 	is a	 transducer.n.01
transducer.

## Manually annotate Senses and Hypernyms/Hyponyms


In [5]:
def annotate_synsets(sentences):
  """This function queries WordNet for each word in a list of sentences,
     and asks the user to input a number corresponding to the synset."""
  
  print(sent.upper())
  word_senses = {}
  # Cached selections maps from word string to the previous
  # selection for this word (an integer)
  cached_selections = {}

  for i, sent in enumerate(sentences):
    words = word_tokenize(sent.lower())

    for word in words:
      sysnsets = wn.synsets(word)
      if len(sysnsets) != 0:
        selection = select_synset(sent, word, sysnsets, cached_selections)
        if selection != None:
          cached_selections[word] = selection
          if selection < len(sysnsets):
            s = sysnsets[selection]
            word_senses[word] = s.name()
        print()
  print("---")
  return word_senses


def select_synset(sent, word, sysnsets, cached_selections):
  """Ask the user to select which sense of the word  
     is being used in this sentence."""
  print(word.upper())

  prev_selection = -1
  if word in cached_selections:
    prev_selection = cached_selections[word]

  for choice, s in enumerate(sysnsets):
    if choice == prev_selection:
      print("*** ", end = '')
    print("%d) %s - %s" % (choice, s.name(), s.definition()))

  choice += 1
  if choice == prev_selection:
    print("*** ", end = '')
  print("%d) None of these." % choice)

  selection = -1
  while selection == -1:
    try:
      user_input = input(">")
      if user_input.strip() == 'x':
        # The user can press 'x' to exit.
        return None
      if user_input.strip() == '' and prev_selection > -1:
        # The user can press retrun to confirm the previous selection.
        return prev_selection
      selection = int(user_input)
    except:
      selection = -1
    if selection < 0 or selection > len(sysnsets):
      print("Please select a number between 0-%d, or type 'x' to exit" % len(sysnsets))
      if prev_selection > -1:
        print("You can also press return to confirm the previous selection (marked by ***).")
    else:
      return selection


def confirm_hyponyms(word, sysnset, do_hypernyms_instead=False):
  """Ask the user to confirm which of the hyponyms are applicable 
     for this sentence."""
  print("\n",word.upper())

  confirmed = []
  if do_hypernyms_instead:
    unconfirmed = sysnset.hypernyms()
  else:
    unconfirmed = sysnset.hyponyms()

  while len(unconfirmed) > 0:
    s = unconfirmed.pop(0)
    print("Is %s an appropriate substitute for %s? (y/n)" % (s.name(), word))
    print("It means:", s.definition())
    print("Synonyms are:", get_synonyms(s))
    user_input = ''
    while user_input == '':
      user_input = input(">")
      user_input = user_input.strip()
      if user_input == 'y' or user_input == 'yes':
        confirmed.append(s.name())
        if do_hypernyms_instead:
          unconfirmed.extend(s.hypernyms())
        else:
          unconfirmed.extend(s.hyponyms())
        
      elif user_input == 'n' or user_input == 'no':
        pass
      elif user_input == 'x':
        # The user can press 'x' to exit.
        return confirmed
      else:
        print("Please type 'yes' or 'no' or 'x' to stop confirming for this word")
        user_input = ''
  return confirmed

# Save your annotations to a file, so that you can submit them with your homework.
def save_to_drive(word_senses, confirmed_hyponyms, confirmed_hypernyms):
  import json
  from google.colab import drive
  drive.mount('/content/drive/')

  output_file = '/content/drive/My Drive/word-sense-annotations.json'
  output_json = {}
  output_json['senses'] = word_senses
  output_json['hyponyms'] = confirmed_hyponyms
  output_json['hypernyms'] = confirmed_hypernyms

  with open(output_file, 'w') as write_file:
    write_file.write(json.dumps(output_json, sort_keys=True, indent=4))
    write_file.write('\n')

#TODO: Sua loi huhu !!!!!!!!!!!!!!!!!
def save_to_file(word_senses, confirmed_hyponyms, confirmed_hypernyms):
  import json

  output_file = 'word-sense-annotations.json'
  output_json = {}
  output_json['senses'] = word_senses
  output_json['hyponyms'] = confirmed_hyponyms
  output_json['hypernyms'] = confirmed_hypernyms

  with open(output_file, 'w') as f:
      json.dump(output_json, f, ensure_ascii=False, sort_keys=True, indent=4)

In [6]:
commands = [
    'pick rose',
    'catch fish'
]

In [7]:
# Test cell
#TODO: SUA LOI HUHU
print("ANNOTATING SYNSETS...")
word_senses = annotate_synsets(commands)
confirmed_hyponyms = {}
confirmed_hypernyms = {}

for word in word_senses:
    print("First, pick the word sense for the word '%s'" %word)
    print("==========")
    word_sense = wn.synset(word_senses[word])
    print("Next, pick which hypernyms of %s we should allow players to use." %word_sense.name())
    print("==========")
    confirmed_hypernyms[word] = confirm_hyponyms(word, word_sense, do_hypernyms_instead=True)
    print("Finally, pick which hyponyms of %s we should allow players to use." %word_sense.name())
    print('==========')
    confirmed_hyponyms[word] = confirm_hyponyms(word, word_sense)

print("You've done annotating!")
print("Saving your annotation to local file ('word-sense-annotations.json')...")
save_to_file(word_sense, confirmed_hyponyms, confirmed_hypernyms)

ANNOTATING SYNSETS...


UnboundLocalError: local variable 'sent' referenced before assignment

### Look over Annotations

In [18]:
for word in word_senses:
    print(word.upper())
    word_sense = wn.synset(word_senses[word])
    print("Synonyms:", get_synonyms(word_sense))
    
    print("Hypernyms:",)
    for hypernym in confirmed_hypernyms:
        print("\t", get_synonyms(wn.synset(hypernym)))
    
    print("Hyponyms:",)
    hyponyms = confirmed_hyponyms[word]
    for hyponym in hyponyms:
        print("\t", get_synonyms(wn.synset(hyponym)))
    print("---")

WEAR
Synonyms: ['wear', 'have on']
Hypernyms:
Hyponyms:
---
CROWN
Synonyms: ['Crown']
Hypernyms:
Hyponyms:
---


## Enumnerate Alternative Wordings of Commands
Ouput a set of reasonably accurate paraphrases for the commands in our game

In [9]:
import itertools

def get_alternatives(word, word_senses, confirmed_hypernyms, confirmed_hyponyms):
    """
    Create a list of reasonable alternative for a word by listing out the synonyms for its word sense, and for its hyponyms and hypernyms
    """
    alternatives = []
    if not word in word_senses:
        alternatives.append(word)
        return alternatives
    
    word_sense = wn.synset(word_senses[word])
    alternatives.extend(get_synonyms(word_sense))
    for hypernym in confirmed_hypernyms[word]:
        alternatives.extend(get_synonyms(wn.synset(hypernym)))
    for hyponym in confirmed_hyponyms[word]:
        alternatives.extend(get_synonyms(wn.synset(hyponym)))
    return alternatives


def enumerate_alternatives(sentence, word_senses, confirmed_hypernyms, confirmed_hyponyms):
    """
    Enumerate all of the sentences that can result
    by taking any combination of the alternates for each word in the sentence
    """
    words = word_tokenize(sentence.lower())
    # 2-D list
    alternatives_per_word = []
    for word in words:
        alternative = get_alternatives(word, word_senses, confirmed_hypernyms, confirmed_hyponyms)
        alternatives_per_word.append(alternatives)
    
    # combination of 2-D lists
    alternative_to_original = {}
    for word in list(itertools.product(*alternatives_per_word)):
        alt_sent = " ".join(words)
        alternative_to_original[alt_sent] = sentence
    return alternative_to_original

In [10]:
alternative_commands = {}
for command in commands:
    alternative_commands.update(enumerate_alternatives(command,
                                                        word_senses,
                                                        confirmed_hypernyms,
                                                        confirmed_hyponyms))

for alt_sent in alternative_commands:
    print("%s => %s" %(alt_sent, alternative_commands[alt_senst]))
print("Congratulations, you can now handle %d commands instead of just %d!" %(len(alternative_commands.keys()), len(commands)))

NameError: name 'confirmed_hypernyms' is not defined