In [106]:
from nltk.corpus import wordnet as wn
import json

In [227]:
def find_senses(word):
    print("********************************")
    print("This is the word {}".format(word))
    synonyms = {}
    antonyms = {}
    for i, syn in enumerate(wn.synsets(word)):
        synonym_senses_explored[syn.name()] = 1
        print("")
        print((syn.name(), syn.definition(), syn.lemma_names()))
        if syn.examples():
            print("Here's an example: {}".format(syn.examples()[0]))
        keep = input("Should I keep this lemma?")
        if "s" in keep:
            break

        if "b" in keep:
            prev_syn = wn.synsets(word)[i-1]
            for l in syn.lemmas(): 
                synonyms[l.name()] = 1 
                for antoymn in l.antonyms():
                    antonyms[antoymn.name()] = 1
            print("stored previous synset")
            keep = input("Should I keep this lemma?")

        if "y" in keep:
            for l in syn.lemmas(): 
                synonyms[l.name()] = 1 
                for antoymn in l.antonyms():
                    antonyms[antoymn.name()] = 1
    return {"synonyms" : list(synonyms.keys()), "antonyms" : list(antonyms.keys())}

In [228]:
def swap_senses(first_sense_dict, second_sense_dict):
    for word in first_sense_dict["synonyms"]:
        if word not in second_sense_dict["antonyms"]:
            second_sense_dict["antonyms"].append(word)
    
    for word in first_sense_dict["antonyms"]:
        if word not in second_sense_dict["synonyms"]:
            second_sense_dict["synonyms"].append(word)
    
    for word in second_sense_dict["synonyms"]:
        if word not in first_sense_dict["antonyms"]:
            first_sense_dict["antonyms"].append(word)
    
    for word in second_sense_dict["antonyms"]:
        if word not in first_sense_dict["synonyms"]:
            first_sense_dict["synonyms"].append(word)
    
    return first_sense_dict, second_sense_dict

In [229]:
def check_cache_and_process(word, cache):
    if word in cache:
        print("######################")
        print("We already looked up this word: {}, {}".format(word, cache[word]))
        keep_going = input("Still look up this word: ")
        if "y" in keep_going:
            word_sense = find_senses(word)
            cache[word].append(word_sense)
        else:
            if len(cache[word]) > 1:
                sense_index = int(input("Which sense would you like: "))
                word_sense = cache[word][sense_index]
            else:
                word_sense = cache[word][0]
    else:
        word_sense = find_senses(word)
        cache[word] = [word_sense]
    
    return word_sense, cache

In [230]:
def find_senses_for_physical_data(param_dictionaries, cache, finished_keys=[]):
    try:
        for key in param_dictionaries:
            if key not in finished_keys:
                param_dictionary = param_dictionaries[key]
                original_comparison = param_dictionary["original_comparison"]
                antonym_comparison = param_dictionary["antonym_comparison"]
                premise_switch = param_dictionary["premise_switch"]

                original_comparison_senses, cache = check_cache_and_process(original_comparison, 
                                                                                             cache)
                antonym_comparison_senses, cache = check_cache_and_process(antonym_comparison, 
                                                                                            cache)

                original_comparison_senses, antonym_comparison_senses = swap_senses(original_comparison_senses, 
                                                                                    antonym_comparison_senses)
                premise_switch_senses = {}
                for key_2 in premise_switch:
                    first_premise = premise_switch[key_2][0]
                    second_premise = premise_switch[key_2][1]
                    first_premise_senses, cache = check_cache_and_process(first_premise, cache)

                    second_premise_senses, cache = check_cache_and_process(second_premise, cache)

                    first_premise_senses, second_premise_senses = swap_senses(first_premise_senses, second_premise_senses)
                    premise_switch_senses[key_2] = {first_premise : first_premise_senses, 
                                                  second_premise : second_premise_senses}

                param_dictionary["original_comparison_senses"] = original_comparison_senses
                param_dictionary["antonym_comparison_senses"] = antonym_comparison_senses
                param_dictionary["premise_switch_senses"] = premise_switch_senses

                param_dictionaries[key] = param_dictionary
                finished_keys.append(key)
    
    finally:
        return param_dictionaries, cache, finished_keys

In [210]:
with open("physical_data.json") as f:
    physcial_data_params = json.load(f)

In [214]:
sense_cache = {}
finished_keys = []
physcial_data_params, sense_cache, finished_keys = find_senses_for_physical_data(physcial_data_params, 
                                                                                 sense_cache,
                                                                                 finished_keys)

********************************
This is the word smaller

('smaller.s.01', 'small or little relative to something else', ['smaller', 'littler'])
Should I keep this lemma?y

('small.a.01', 'limited or below average in number or quantity or magnitude or extent', ['small', 'little'])
Here's an example: a little dining room
Should I keep this lemma?y

('minor.s.10', 'limited in size or scope', ['minor', 'modest', 'small', 'small-scale', 'pocket-size', 'pocket-sized'])
Here's an example: a small business
Should I keep this lemma?y

('little.s.03', '(of children and animals) young, immature', ['little', 'small'])
Here's an example: what a big little boy you are
Should I keep this lemma?n

('small.s.04', 'slight or limited; especially in degree or intensity or scope', ['small'])
Here's an example: a series of death struggles with small time in between
Should I keep this lemma?n

('humble.s.01', 'low or inferior in station or quality', ['humble', 'low', 'lowly', 'modest', 'small'])
Here's an 

Should I keep this lemma?y

('faint.s.04', 'weak and likely to lose consciousness', ['faint', 'light', 'swooning', 'light-headed', 'lightheaded'])
Here's an example: suddenly felt faint from the pain
Should I keep this lemma?n

('light.s.16', 'very thin and insubstantial', ['light'])
Here's an example: thin paper
Should I keep this lemma?y

('abstemious.s.02', 'marked by temperance in indulgence', ['abstemious', 'light'])
Here's an example: abstemious with the use of adverbs
Should I keep this lemma?n

('light.s.18', 'less than the correct or legal or full amount often deliberately so', ['light', 'scant', 'short'])
Here's an example: a light pound
Should I keep this lemma?n

('light.s.19', 'having little importance', ['light'])
Here's an example: losing his job was no light matter
Should I keep this lemma?n

('light.s.20', 'intended primarily as entertainment; not serious or profound', ['light'])
Here's an example: light verse
Should I keep this lemma?n

('idle.s.04', 'silly or trivial

Should I keep this lemma?n
********************************
This is the word airier

('aired.s.01', 'open to or abounding in fresh air', ['aired', 'airy'])
Here's an example: airy rooms
Should I keep this lemma?y

('airy.s.02', 'not practical or realizable; speculative', ['airy', 'impractical', 'visionary', 'Laputan', 'windy'])
Here's an example: airy theories about socioeconomic improvement
Should I keep this lemma?n

('airy.s.03', 'having little or no perceptible weight; so light as to resemble air', ['airy'])
Here's an example: airy gauze curtains
Should I keep this lemma?y

('aeriform.s.02', 'characterized by lightness and insubstantiality; as impalpable or intangible as air; - Thomas Carlyle', ['aeriform', 'aerial', 'airy', 'aery', 'ethereal'])
Here's an example: figures light and aeriform come unlooked for and melt away
Should I keep this lemma?n
######################
We already looked up this word: should, [{'synonyms': [], 'antonyms': []}]
Still look up this word: n
##########

Should I keep this lemma?n

('hot.s.13', 'newest or most recent', ['hot', 'red-hot'])
Here's an example: news hot off the press
Should I keep this lemma?n

('hot.s.14', 'having or bringing unusually good luck', ['hot'])
Here's an example: hot at craps
Should I keep this lemma?n

('hot.s.15', 'very good; often used in the negative', ['hot'])
Here's an example: he's hot at math but not so hot at history
Should I keep this lemma?n

('hot.s.16', 'newly made', ['hot'])
Here's an example: a hot scent
Should I keep this lemma?n

('hot.s.17', 'having or showing great eagerness or enthusiasm', ['hot'])
Here's an example: hot for travel
Should I keep this lemma?n

('hot.s.18', 'of a seeker; very near to the object sought', ['hot'])
Here's an example: you are hot
Should I keep this lemma?n

('hot.s.19', 'having or dealing with dangerously high levels of radioactivity', ['hot'])
Here's an example: hot fuel rods
Should I keep this lemma?n

('hot.s.20', 'charged or energized with electricity', ['hot

In [215]:
with open("physical_data_params_wn", "w") as f:
    json.dump(physcial_data_params, f)
# print(physcial_data_params)

In [219]:
with open("sense_cache_wn", "w") as f:
    json.dump(sense_cache, f)

In [220]:
with open("physical_data_sentences.json") as f:
    physical_data_sentences = json.load(f)

In [221]:
for key in physical_data_sentences:
    physical_data_sentences[key]["mask"] = {}
    params = physcial_data_params[key]
    physical_data_sentences[key]["mask"][params["original_comparison"]] = params["original_comparison_senses"]
    physical_data_sentences[key]["mask"][params["antonym_comparison"]] = params["antonym_comparison_senses"]
    physical_data_sentences[key]["mask"]["premise_switch_senses"] = params["premise_switch_senses"]

In [222]:
with open("physical_data_sentences_wn", "w") as f:
    json.dump(physical_data_sentences, f)

In [241]:
def find_senses_for_material_social_data(param_dictionaries, cache, finished_keys=[]):
    try:
        for key in param_dictionaries:
            if key not in finished_keys:
                param_dictionary = param_dictionaries[key]
                original_comparison = param_dictionary["antonym_switch"][0]
                antonym_comparison = param_dictionary["antonym_switch"][1]
                premise_switch = param_dictionary["premise_switch"]

                original_comparison_senses, cache = check_cache_and_process(original_comparison, 
                                                                            cache)
                antonym_comparison_senses, cache = check_cache_and_process(antonym_comparison, 
                                                                           cache)

                original_comparison_senses, antonym_comparison_senses = swap_senses(original_comparison_senses, 
                                                                                    antonym_comparison_senses)
                premise_switch_senses = {}
                for key_2 in premise_switch:
                    first_premise = premise_switch[key_2][0]
                    second_premise = premise_switch[key_2][1]
                    first_premise_senses, cache = check_cache_and_process(first_premise,
                                                                          cache)

                    second_premise_senses, cache = check_cache_and_process(second_premise,
                                                                           cache)

                    first_premise_senses, second_premise_senses = swap_senses(first_premise_senses, second_premise_senses)
                    premise_switch_senses[key_2] = {first_premise : first_premise_senses, 
                                                  second_premise : second_premise_senses}

                param_dictionary["original_comparison_senses"] = original_comparison_senses
                param_dictionary["antonym_comparison_senses"] = antonym_comparison_senses
                param_dictionary["premise_switch_senses"] = premise_switch_senses

                param_dictionaries[key] = param_dictionary
                finished_keys.append(key)
    finally:
        return param_dictionaries, cache, finished_keys

In [251]:
with open("material_data.json") as f:
    material_data_params = json.load(f)

In [237]:
with open("sense_cache_wn") as f:
    sense_cache = json.load(f)

In [242]:
copy = sense_cache

In [261]:
finished_keys = []
material_data_params, sense_cache, finished_keys = find_senses_for_material_social_data(material_data_params,
                                                                                  sense_cache,
                                                                                  finished_keys)

######################
We already looked up this word: transparent, [{'synonyms': ['crystalline', 'crystal_clear', 'limpid', 'lucid', 'pellucid', 'transparent', 'diaphanous', 'filmy', 'gauzy', 'gauze-like', 'gossamer', 'see-through', 'sheer', 'vaporous', 'vapourous', 'cobwebby', 'clear'], 'antonyms': ['opaque']}]
Still look up this word: n
######################
We already looked up this word: opaque, [{'synonyms': ['opaque'], 'antonyms': ['clear', 'crystalline', 'crystal_clear', 'limpid', 'lucid', 'pellucid', 'transparent', 'diaphanous', 'filmy', 'gauzy', 'gauze-like', 'gossamer', 'see-through', 'sheer', 'vaporous', 'vapourous', 'cobwebby']}]
Still look up this word: n
######################
We already looked up this word: more, [{'synonyms': ['more', 'more_than', 'to_a_greater_extent'], 'antonyms': ['less', 'fewer', 'to_a_lesser_extent']}]
Still look up this word: n
######################
We already looked up this word: less, [{'synonyms': ['less', 'to_a_lesser_extent', 'fewer'], 'an

In [262]:
with open("material_data_params_wn", "w") as f:
    json.dump(material_data_params, f)

In [264]:
with open("sense_cache_wn", "w") as f:
    json.dump(sense_cache, f)

In [265]:
with open("material_data_sentences.json") as f:
    material_data_sentences = json.load(f)

In [266]:
for key in material_data_sentences:
    material_data_sentences[key]["mask"] = {}
    params = material_data_params[key]
    material_data_sentences[key]["mask"][params["antonym_switch"][0]] = params["original_comparison_senses"]
    material_data_sentences[key]["mask"][params["antonym_switch"][1]] = params["antonym_comparison_senses"]
    material_data_sentences[key]["mask"]["premise_switch_senses"] = params["premise_switch_senses"]

In [267]:
with open("material_data_sentences_wn", "w") as f:
    json.dump(material_data_sentences, f)

In [268]:
with open("social_data.json") as f:
    social_data_params = json.load(f)

In [273]:
finished_keys = []
social_data_params, sense_cache, finished_keys = find_senses_for_material_social_data(social_data_params,
                                                                                      sense_cache,
                                                                                      finished_keys)

######################
We already looked up this word: skilled, [{'synonyms': ['skilled', 'competent'], 'antonyms': ['unskilled', 'incompetent', 'incompetent_person', 'unqualified', 'bungling', 'clumsy', 'fumbling', 'incapable', 'unequal_to']}]
Still look up this word: n
######################
We already looked up this word: incompetent, [{'synonyms': ['incompetent', 'incompetent_person', 'unqualified', 'bungling', 'clumsy', 'fumbling', 'unskilled', 'incapable', 'unequal_to'], 'antonyms': ['competent', 'skilled']}]
Still look up this word: n
######################
We already looked up this word: more, [{'synonyms': ['more', 'more_than', 'to_a_greater_extent'], 'antonyms': ['less', 'fewer', 'to_a_lesser_extent']}]
Still look up this word: n
######################
We already looked up this word: less, [{'synonyms': ['less', 'to_a_lesser_extent', 'fewer'], 'antonyms': ['more', 'more_than', 'to_a_greater_extent']}]
Still look up this word: n
######################
We already looked up this

Still look up this word: n
######################
We already looked up this word: less, [{'synonyms': ['less', 'to_a_lesser_extent', 'fewer'], 'antonyms': ['more', 'more_than', 'to_a_greater_extent']}]
Still look up this word: n


In [276]:
with open("social_data_params_wn", "w") as f:
    json.dump(social_data_params, f)

In [274]:
with open("sense_cache_wn", "w") as f:
    json.dump(sense_cache, f)

In [275]:
with open("social_data_sentences.json") as f:
    social_data_sentences = json.load(f)

In [277]:
for key in social_data_sentences:
    social_data_sentences[key]["mask"] = {}
    params = social_data_params[key]
    social_data_sentences[key]["mask"][params["antonym_switch"][0]] = params["original_comparison_senses"]
    social_data_sentences[key]["mask"][params["antonym_switch"][1]] = params["antonym_comparison_senses"]
    social_data_sentences[key]["mask"]["premise_switch_senses"] = params["premise_switch_senses"]

In [278]:
with open("social_data_sentences_wn", "w") as f:
    json.dump(social_data_sentences, f)