### Imports

In [1]:
import pandas as pd
from tqdm.notebook import tqdm
import numpy as np
from nltk.corpus import wordnet as wn
import json
import pickle
import requests

### 1. Process Seed Word Dictionaries

Processing the seed word list from: 

- *Nicolas, G., Bai, X., & Fiske, S. T. (2021). Comprehensive stereotype content dictionaries using a semi‐automated method. In European Journal of Social Psychology (Vol. 51, Issue 1, pp. 178–196). Wiley. https://doi.org/10.1002/ejsp.2724* 

which is included in the file *finalwordlist R 050418.csv* in the online supplement found under https://osf.io/yx45f/. 

In [2]:
# Read seed word list. 
seed_words = pd.read_csv("finalwordlist R 050418.csv")
seed_words = seed_words[seed_words["PoS"] == "ADJECTIVE"]

# Drop religion. 
seed_words = seed_words[seed_words["Dictionary"] != "Religion"]


# Add relevant WordNet information. 
def get_synset(row):
    return wn.synsets(row["term"], pos = wn.ADJ)[row["sense"]-1].name()

def get_definition(row):
    return wn.synsets(row["term"], pos = wn.ADJ)[row["sense"]-1].definition()

def get_example(row):
    try: 
        return wn.synsets(row["term"], pos = wn.ADJ)[row["sense"]-1].examples()[0]
    except:
        pass
      
def get_antonyms(row):
    antonyms = []
    for lemma in wn.synsets(row["term"], pos = wn.ADJ)[row["sense"]-1].lemmas():
        try: 
            for antonym in lemma.antonyms(): 
                antonyms.append(antonym.synset().name())
        except: 
            pass
    try: 
        return antonyms[0]
    except:
        pass
        
# Applying functions. 
seed_words["Synset"] = seed_words.apply(lambda row: get_synset(row), axis=1)
seed_words["Definition"] = seed_words.apply(lambda row: get_definition(row), axis=1)
seed_words["Examples"] = seed_words.apply(lambda row: get_example(row), axis=1)
seed_words["Antonyms"] = seed_words.apply(lambda row: get_antonyms(row), axis=1)

In [3]:
# Adding sample sentences from Oxford Dictionaries for cases where there is no sample sentence on WordNet. 

app_id = "27817df8"
app_key = "b06cc3abf9e998663379f9e42d23a81e"
language = "en-gb"

def get_examples_oxford(row):
    
    if row["Examples"] is None:
        try: 
            word_id = row["term"]
            url = "https://od-api.oxforddictionaries.com:443/api/v2/entries/" + language + "/" + word_id.lower()
            result = requests.get(url, headers={"app_id": app_id, "app_key": app_key}) 
            result = json.loads(result.text)
            result = result["results"][0]["lexicalEntries"][0]["entries"][0]["senses"][0]["examples"][0]["text"]

            return result 

        except: 
            pass
        
    else: 
        return row["Examples"]
    
seed_words["Examples"] = seed_words.apply(lambda row: get_examples_oxford(row), axis = 1)

In [4]:
# Export to CSV. 

seed_words.to_csv("seed_words_raw.csv")

In [5]:
# Import

seed_words = pd.read_csv("seed_words_raw.csv", index_col = 0).reset_index(drop = True)

In [10]:
seed_words

Unnamed: 0,Dictionary,term,term preproc,PoS,sense,Dir,Synset,Definition,Examples,Antonyms
0,Sociability,sociable,sociable,ADJECTIVE,1,high,sociable.a.01,inclined to or conducive to companionship with...,a sociable occasion,unsociable.a.01
1,Sociability,unsociable,unsociable,ADJECTIVE,1,low,unsociable.a.01,not inclined to society or companionship,an unsociable nature...shy and reserved,sociable.a.01
2,Sociability,friendly,friendly,ADJECTIVE,1,high,friendly.a.01,characteristic of or befitting a friend,friendly advice,unfriendly.a.02
3,Sociability,friendly,friendly,ADJECTIVE,2,high,friendly.s.02,inclined to help or support; not antagonistic ...,a government friendly to our interests,
4,Sociability,unfriendly,unfriendly,ADJECTIVE,2,low,unfriendly.a.02,not disposed to friendship or friendliness,an unfriendly coldness of manner,friendly.a.01
...,...,...,...,...,...,...,...,...,...,...
341,Politics,progressive,progressive,ADJECTIVE,1,low,progressive.a.01,favoring or promoting progress,progressive schools,regressive.a.02
342,Politics,progressive,progressive,ADJECTIVE,2,low,progressive.s.02,favoring or promoting reform (often by governm...,a progressive decline in popularity,
343,Politics,narrow-minded,narrowminded,ADJECTIVE,2,high,narrow-minded.a.02,lacking tolerance or flexibility or breadth of...,a brilliant but narrow-minded judge,broad-minded.a.02
344,Politics,narrow-minded,narrowminded,ADJECTIVE,3,high,narrow-minded.s.03,rigidly adhering to a particular sect or its d...,it would be narrow-minded not to welcome these...,


### 2. Match Antonyms

In [25]:
def clean_and_match_antonyms(df):
    
    # Split dictionary into high and low and remove dimensions with missing matches. 
    dict_high = df[(df["Dir"] == "high") & (df["Antonyms"].notnull())]
    dict_low = df[(df["Dir"] == "low") & (df["Antonyms"].notnull())]
    
    # Clean and rename columns. 
    dict_high = dict_high[["Dictionary", "term", "Synset", "Definition", "Examples", "Antonyms"]]
    dict_high = dict_high.rename(columns={"term":"Term1", "Synset": 'Synset1', "Definition": "Definition1", 
                                          "Examples":"Example1", "Antonyms": "Synset2"})
    
    dict_low = dict_low[["term", "Synset", "Definition", "Examples"]]
    dict_low = dict_low.rename(columns={"term":"Term2", "Synset": 'Synset2', "Definition": "Definition2", 
                                       "Examples": "Example2"})
    
    # Merge both dictionaries. 
    dict_merged = pd.merge(dict_high, dict_low, on=["Synset2"])
    dict_merged = dict_merged.dropna().reset_index(drop = True)
    dict_merged.drop_duplicates(inplace = True)
    dict_merged.drop_duplicates(subset = ["Synset1"], inplace = True)

    return dict_merged

In [26]:
all_matched = clean_and_match_antonyms(seed_words)

In [148]:
# Clean and match antonym dictionaries 

sociability_seed_matched = clean_and_match_antonyms(sociability_seed)
ability_seed_matched = clean_and_match_antonyms(ability_seed)
status_seed_matched = clean_and_match_antonyms(status_seed)
agency_seed_seed_matched = clean_and_match_antonyms(agency_seed)
morality_seed_matched = clean_and_match_antonyms(morality_seed)
politics_seed_matched = clean_and_match_antonyms(politics_seed)

In [162]:
# Checking length of individual dictionaries. 

print(f"""Sociability Length: {len(sociability_seed_matched)} \n
Ability Length: {len(ability_seed_matched)} \n
Status Length: {len(status_seed_matched)} \n
Agency Length: {len(agency_seed_seed_matched)} \n
Morality Length: {len(morality_seed_matched)} \n
Politics Length: {len(politics_seed_matched)}""")

Sociability Length: 14 

Ability Length: 13 

Status Length: 5 

Agency Length: 8 

Morality Length: 19 

Politics Length: 3


### 3. Create Files compatible with SensePOLAR

To be compatible with the SensePolar framework, seed word lists are converted into different text and pkl files.

In [27]:
def create_antonym_text_files(df):

    example_sentences_readable = {}
    get_lookup_anto_example_dict = []
    get_lookup_synset_definition = []
    get_lookup_synset_dict = []
    
    for entry in tqdm(df.index):
        
        # Create antonym_wordnet_example_sentences_readable_extended.txt
        antonym_pair = str([str(df.at[entry, "Synset1"]), str(df.at[entry, "Synset2"])])
        
        term1 = df.at[entry, "Term1"]
        term2 = df.at[entry, "Term2"]        
        example1 = df.at[entry, "Example1"]
        example2 = df.at[entry, "Example2"]
        
        sub_dict = {term1:[example1], term2:[example2]}
        
        example_sentences_readable[antonym_pair] = sub_dict
    
        
        # Create get_lookup_anto_example_dict. 
        
        get_lookup_anto_example_dict.append([[term1, [example1]], [term2, [example2]]])
        
        
        # Create get_lookup_synset_definition. 
        definition1 = df.at[entry, "Definition1"]
        definition2 = df.at[entry, "Definition2"]
        get_lookup_synset_definition.append([definition1, definition2])
        
        # Create get_lookup_synset_dict. 
        get_lookup_synset_dict.append(antonym_pair)

    # Create text files. 
    with open('antonyms/antonym_wordnet_example_sentences_readable_extended.txt', 'w') as file:
        file.write(json.dumps(example_sentences_readable, indent=4))
    with open('antonyms/lookup_anto_example_dict.txt', 'w') as file:
        file.write(json.dumps(get_lookup_anto_example_dict, indent=4))
    with open('antonyms/lookup_synset_definition.txt', 'w') as file:
        file.write(json.dumps(get_lookup_synset_definition, indent=4))        
    with open('antonyms/lookup_synset_dict.txt', 'w') as file:
        file.write(json.dumps(get_lookup_synset_dict, indent=4))
        
    # Create pkl files.    
    with open('antonyms/lookup_anto_example_dict.pkl', 'wb') as file:
        pickle.dump(get_lookup_anto_example_dict, file)
    with open('antonyms/lookup_synset_definition.pkl', 'wb') as file:
        pickle.dump(get_lookup_synset_definition, file)
    with open('antonyms/lookup_synset_dict.pkl', 'wb') as file:
        pickle.dump(get_lookup_synset_dict, file)

In [28]:
create_antonym_text_files(all_matched)

  0%|          | 0/58 [00:00<?, ?it/s]