In [68]:
from nltk.corpus import wordnet as wn
import json

In [69]:
import sys
sys.path.append('../')

In [78]:
# loading all 60 Truism Sets
with open("../data/truism_data/balanced_param_data.json", "r") as f:
    all_truisms = json.load(f)

### Breaking down sets by the type of antonym switch that needs to take place

Categories are:
* one word to one word switch
* one word to multiple words switch
* multi-word to multi-word switch, but only one word needs to be flipped
* multi-word to one word switch
* multi-word to multi-word switch

*^-- original-property-statement to antonym-property-statement*

In [71]:
# property and antonym are both one word
one_word_to_one_word = 0
one_word_to_one_word_dict = {}

# property is one word, but antonym is multi-word
one_word_to_multi_word = 0
one_word_to_multi_word_dict = {}

# property and antonym are multi-word but only differ by one word
one_word_flip = 0
one_word_flip_dict = {}

# property is multi-word, but antonym is one word
multi_word_to_one_word = 0
multi_word_to_one_word_dict = {}

# property and antonym are both multi-word
multi_word_to_multi_word = 0
multi_word_to_multi_word_dict = {}

for truism in all_truisms:
    antonym_switch = all_truisms[truism]["antonym_switch"]
    parts_1 = antonym_switch[0].split(" ")
    parts_2 = antonym_switch[1].split(" ")
    
    if len(parts_1) == 1:
        if len(parts_2) == 1:
            one_word_to_one_word += 1
            one_word_to_one_word_dict[truism] = antonym_switch
        else:
            one_word_to_multi_word += 1
            one_word_to_multi_word_dict[truism] = antonym_switch
    
    elif len(set(parts_1).intersection(set(parts_2))) == len(parts_1)-1:
        one_word_flip += 1
        one_word_flip_dict[truism] = antonym_switch
    
    else:
        if len(parts_2) == 1:
            multi_word_to_one_word += 1
            multi_word_to_one_word_dict[truism] = antonym_switch
        else:
            multi_word_to_multi_word += 1
            multi_word_to_multi_word_dict[truism] = antonym_switch

print("one_word_to_one_word truism count: {}".format(one_word_to_one_word))
print("one_word_to_multi_word count: {}".format(one_word_to_multi_word))
print("one_word_flip count: {}".format(one_word_flip))
print("multi_word_to_one_word count: {}".format(multi_word_to_one_word))
print("multi_word_to_multi_word: {}".format(multi_word_to_multi_word))

one_word_to_one_word truism count: 27
one_word_to_multi_word count: 3
one_word_flip count: 16
multi_word_to_one_word count: 1
multi_word_to_multi_word: 13


### WordNet can only natively handle one word at a time, so here I'm looking at the 27 one_word_to_one_word pairs

In [72]:
# human inputed antonym returned by word_net
found_in_wn = 0
found_in_wn_dict = {}

# set of antonym's do exist in wordnet
found_ant = 0
found_ant_dict = {}

# nothing found in wordnet
not_found = 0
not_found_dict = {}

for key in one_word_to_one_word_dict:
    antonym_switch = one_word_to_one_word_dict[key]
    found = False
    for synset in wn.synsets(antonym_switch[0]):
        for l in synset.lemmas():
            for antonym in l.antonyms():
                if antonym.name() == antonym_switch[1]:
                    found_in_wn += 1
                    found_in_wn_dict[key] = antonym_switch
                    found = True
                elif len(antonym.name()):
                    found_ant += 1
                    found_ant_dict[key] = antonym_switch
                    found = True
                
                if found:
                    break
            if found:
                break
        if found:
            break
    if not found:
        not_found += 1
        not_found_dict[key] = antonym_switch

print("human inputed antonym found for {} truisms".format(found_in_wn))
print("wordnet provided possible antonyms for {} truisms".format(found_ant))
print("wordnet found no potential antonyms for {} truisms".format(not_found))

human inputed antonym found for 3 truisms
wordnet provided possible antonyms for 17 truisms
wordnet found no potential antonyms for 7 truisms


In [91]:
# Pairs that nothing was found
not_found_dict

{'0': ['transparent', 'opaque'],
 '4': ['fragile', 'sturdy'],
 '8': ['ductile', 'rigid'],
 '9': ['foldable', 'rigid'],
 '24': ['melt', 'freeze'],
 '33': ['detect', 'ignore'],
 '52': ['sinning', 'praying']}

### For the 17 pairs that Wordnet returns at least one possible antonym, here are the canditate antonyms

* Results aren't terrible

In [79]:
for key in found_ant_dict:
    antonym_switch = one_word_to_one_word_dict[key]
    possible_antonyms = {}
    for synset in wn.synsets(antonym_switch[0]):
        for l in synset.lemmas():
            for antonym in l.antonyms():
                possible_antonyms[antonym.name()] = 1
    
    print("Original Word: {}, Wordnet provided: {}, Ours: {}".format(antonym_switch[0],
                                                                     list(possible_antonyms.keys()), 
                                                                     antonym_switch[1]))
    print("-----------------")

Original Word: sharp, Wordnet provided: ['dull', 'flat', 'natural'], Ours: blunt
-----------------
Original Word: heavy, Wordnet provided: ['light'], Ours: lightweight
-----------------
Original Word: soft, Wordnet provided: ['hard', 'loud', 'unvoiced', 'hardened', 'forte'], Ours: coarse
-----------------
Original Word: consume, Wordnet provided: ['abstain'], Ours: avoid
-----------------
Original Word: penetrable, Wordnet provided: ['impenetrable'], Ours: tough
-----------------
Original Word: sinking, Wordnet provided: ['float'], Ours: floating
-----------------
Original Word: skilled, Wordnet provided: ['unskilled'], Ours: incompetent
-----------------
Original Word: relaxed, Wordnet provided: ['tense', 'stiffen', 'strain'], Ours: nervous
-----------------
Original Word: happy, Wordnet provided: ['unhappy'], Ours: sad
-----------------
Original Word: effective, Wordnet provided: ['ineffective'], Ours: unproductive
-----------------
Original Word: kind, Wordnet provided: ['unkind'], 

### There is one other case that Wordnet can help with, where there is one_word_flip, but the actual concept/property must be expressed in multiple words

* one_word_flip_dict

I would like to say though, this concept of one_word_flip isn't one we would have knowledge of before hand, so knowing that a concept can be flipped by using one word would have to be detected and then you would have to find the apporpriate word. Maybe something with POS / parse trees can help us here though.


In [89]:
# human inputed antonym returned by word_net
flip_found_in_wn = 0
flip_found_in_wn_dict = {}

# set of antonym's do exist in wordnet
flip_found_ant = 0
flip_found_ant_dict = {}

# nothing found in wordnet
flip_not_found = 0
flip_not_found_dict = {}

for key in one_word_flip_dict:
    antonym_switch = one_word_flip_dict[key]
    parts1 = antonym_switch[0].split(" ")
    parts2 = antonym_switch[1].split(" ")
    
    word_that_needs_be_switched = [x for x in parts1 if x not in parts2][0]
    word_that_is_human_inputted = [x for x in parts2 if x not in parts1][0]
    found = False
    for synset in wn.synsets(word_that_needs_be_switched):
        for l in synset.lemmas():
            for antonym in l.antonyms():
                if antonym.name() == word_that_is_human_inputted:
                    flip_found_in_wn += 1
                    flip_found_in_wn_dict[key] = antonym_switch
                    found = True
                elif len(antonym.name()):
                    flip_found_ant += 1
                    flip_found_ant_dict[key] = antonym_switch
                    found = True
                
                if found:
                    break
            if found:
                break
        if found:
            break
    if not found:
        flip_not_found += 1
        flip_not_found_dict[key] = antonym_switch

print("human inputed antonym found for {} truisms".format(flip_found_in_wn))
print("wordnet provided possible antonyms for {} truisms".format(flip_found_ant))
print("wordnet found no potential antonyms for {} truisms".format(flip_not_found))

human inputed antonym found for 2 truisms
wordnet provided possible antonyms for 8 truisms
wordnet found no potential antonyms for 6 truisms


In [90]:
# Pairs that nothing was found
flip_not_found_dict

{'7': ['water resistance', 'water absorption'],
 '15': ['safe to store', 'safe to dispose of'],
 '19': ['reflect light', 'absorb light'],
 '28': ['lengthy shadow', 'minature shadow'],
 '32': ['discern in water', 'conceal in water'],
 '56': ['proficient tennis player', 'inadequate tennis player']}

### For the 8 pairs that Wordnet returns at least one possible antonym for in the one word flip case here are the canditate antonyms
* Again results aren't terrible

In [76]:
for key in flip_found_ant_dict:
    antonym_switch = flip_found_ant_dict[key]
    parts1 = antonym_switch[0].split(" ")
    parts2 = antonym_switch[1].split(" ")
    
    word_that_needs_be_switched = [x for x in parts1 if x not in parts2][0]
    word_that_is_human_inputted = [x for x in parts2 if x not in parts1][0]
    
    possible_antonyms = {}
    for synset in wn.synsets(word_that_needs_be_switched):
        for l in synset.lemmas():
            for antonym in l.antonyms():
                possible_antonyms[antonym.name()] = 1
    
    print("Original Word: {}, Wordnet provided: {}, Ours: {}".format(word_that_needs_be_switched,
                                                                     list(possible_antonyms.keys()), 
                                                                     word_that_is_human_inputted))
    print("-----------------")

Original Word: expand, Wordnet provided: ['contract'], Ours: maintain
-----------------
Original Word: support, Wordnet provided: ['negate'], Ours: crumble
-----------------
Original Word: dangerous, Wordnet provided: ['safe'], Ours: innocuous
-----------------
Original Word: displace, Wordnet provided: ['hire'], Ours: leave
-----------------
Original Word: difficult, Wordnet provided: ['easy', 'manageable'], Ours: convenient
-----------------
Original Word: withstanding, Wordnet provided: ['surrender'], Ours: breaking
-----------------
Original Word: painful, Wordnet provided: ['painless'], Ours: pleasant
-----------------
Original Word: secure, Wordnet provided: ['unfasten', 'insecure'], Ours: unstable
-----------------


### Core function that you can use below to try out some words

In [85]:
def get_antonyms(word):
    antonyms = {}
    for synset in wn.synsets(word_that_needs_be_switched):
        for l in synset.lemmas():
            for antonym in l.antonyms():
                antonyms[antonym.name()] = 1
    return list(antonyms.keys())

In [88]:
get_antonyms("reflect")

[]

In general I think its clear the WordNet alone will not solve our problems, as it can't natively handle multi-word cases, it doesn't always return any canditate antonyms for a word, and when it returns canditates they aren't always ones that capture the sense we are looking for.