### Imports

In [1]:
import pandas as pd
from tqdm.notebook import tqdm
import numpy as np
from nltk.corpus import wordnet as wn
import json
import pickle
import requests

### 1. Process Seed Word Dictionaries

Processing the seed word list from: 

- *Nicolas, G., Bai, X., & Fiske, S. T. (2021). Comprehensive stereotype content dictionaries using a semi‐automated method. In European Journal of Social Psychology (Vol. 51, Issue 1, pp. 178–196). Wiley. https://doi.org/10.1002/ejsp.2724* 

which is included in the file *finalwordlist R 050418.csv* in the online supplement found under https://osf.io/yx45f/. 

In [2]:
# Read seed word list. 
seed_words = pd.read_csv("finalwordlist R 050418.csv")
seed_words = seed_words[seed_words["PoS"] == "ADJECTIVE"]

# Drop religion. 
seed_words = seed_words[seed_words["Dictionary"] != "Religion"]


# Add relevant WordNet information. 
def get_synset(row):
    return wn.synsets(row["term"], pos = wn.ADJ)[row["sense"]-1].name()

def get_definition(row):
    return wn.synsets(row["term"], pos = wn.ADJ)[row["sense"]-1].definition()

def get_example(row):
    try: 
        return wn.synsets(row["term"], pos = wn.ADJ)[row["sense"]-1].examples()[0]
    except:
        pass
      
def get_antonyms(row):
    antonyms = []
    for lemma in wn.synsets(row["term"], pos = wn.ADJ)[row["sense"]-1].lemmas():
        try: 
            for antonym in lemma.antonyms(): 
                antonyms.append(antonym.synset().name())
        except: 
            pass
    try: 
        return antonyms[0]
    except:
        pass
        
# Applying functions. 
seed_words["Synset"] = seed_words.apply(lambda row: get_synset(row), axis=1)
seed_words["Definition"] = seed_words.apply(lambda row: get_definition(row), axis=1)
seed_words["Examples"] = seed_words.apply(lambda row: get_example(row), axis=1)
seed_words["Antonyms"] = seed_words.apply(lambda row: get_antonyms(row), axis=1)

In [3]:
# Adding sample sentences from Oxford Dictionaries for cases where there is no sample sentence on WordNet. 

app_id = "27817df8"
app_key = "b06cc3abf9e998663379f9e42d23a81e"
language = "en-gb"

def get_examples_oxford(row):
    
    if row["Examples"] is None:
        try: 
            word_id = row["term"]
            url = "https://od-api.oxforddictionaries.com:443/api/v2/entries/" + language + "/" + word_id.lower()
            result = requests.get(url, headers={"app_id": app_id, "app_key": app_key}) 
            result = json.loads(result.text)
            result = result["results"][0]["lexicalEntries"][0]["entries"][0]["senses"][0]["examples"][0]["text"]

            return result 

        except: 
            pass
        
    else: 
        return row["Examples"]
    
seed_words["Examples"] = seed_words.apply(lambda row: get_examples_oxford(row), axis = 1)

# Export to CSV to circumvent API limits. 
seed_words.to_csv("seed_words_raw.csv")

In [30]:
# Import
seed_words = pd.read_csv("seed_words_raw.csv", index_col = 0).reset_index(drop = True)

In [36]:
def check_sample_sentence(row):
    
    try: 

        if row["term"] in row["Examples"].split(" "): 
            return True
        else:
            return False
        
    except: 
        
        pass

In [37]:
seed_words["Sentence_Check"] = seed_words.apply(lambda row: check_sample_sentence(row), axis = 1)

In [53]:
# Export to CSV to circumvent API limits. 
seed_words.to_csv("seed_words_raw.csv")

In [137]:
seed_words_manual = pd.read_csv("seed_words_manual_final.csv", sep = ";", index_col = 0)

### 2. Match Antonyms

In [87]:
def clean_and_match_antonyms(df):
    
    # Split dictionary into high and low and remove dimensions with missing matches. 
    dict_high = df[(df["Dir"] == "high") & (df["Antonyms"].notnull())]
    dict_low = df[(df["Dir"] == "low") & (df["Antonyms"].notnull())]
    
    # Clean and rename columns. 
    dict_high = dict_high[["Dictionary", "term", "Synset", "Definition", "Examples", "Antonyms"]]
    dict_high = dict_high.rename(columns={"term":"Term1", "Synset": 'Synset1', "Definition": "Definition1", 
                                          "Examples":"Example1", "Antonyms": "Synset2"})
    
    dict_low = dict_low[["term", "Synset", "Definition", "Examples"]]
    dict_low = dict_low.rename(columns={"term":"Term2", "Synset": 'Synset2', "Definition": "Definition2", 
                                       "Examples": "Example2"})
    
    # Merge both dictionaries. 
    dict_merged = pd.merge(dict_high, dict_low, on=["Synset2"])
    dict_merged = dict_merged.dropna().reset_index(drop = True)
    dict_merged.drop_duplicates(inplace = True)
    dict_merged.drop_duplicates(subset = ["Synset1"], inplace = True)

    return dict_merged

In [142]:
all_matched = clean_and_match_antonyms(seed_words_manual)
final_dimensions = all_matched.reset_index(drop = True)
final_dimensions.to_csv("final_dimensions.csv")

In [2]:
final_dimensions_manual = pd.read_csv("dimensions_matched_manual.csv", sep = ";", index_col = 0)

In [4]:
final_dimensions_manual

Unnamed: 0,Dictionary,Term1,Synset1,Definition1,Example1,Synset2,Term2,Definition2,Example2
0,Sociability,sociable,sociable.a.01,inclined to or conducive to companionship with...,a sociable occasion,unsociable.a.01,unsociable,not inclined to society or companionship,an unsociable nature shy and reserved
1,Sociability,friendly,friendly.a.01,characteristic of or befitting a friend,friendly advice,unfriendly.a.02,unfriendly,not disposed to friendship or friendliness,an unfriendly coldness of manner
2,Sociability,friendly,friendly.s.02,inclined to help or support and not antagonist...,a government friendly to our interests,unfriendly.s.03,unfriendly,not friendly,an unfriendly act of aggression
3,Sociability,likable,sympathetic.a.04,evoking empathic or sympathetic feelings,this is the person who is very relational and ...,unsympathetic.a.02,unlikable,tending to evoke antipathetic feelings,all the characters were peculiarly unlikable
4,Sociability,likable,likable.s.02,easy to like and agreeable,an attractive and likable young person,unlikable.s.02,unlikable,difficult or impossible to like,a disagreeable and unlikable old person
...,...,...,...,...,...,...,...,...,...
106,Politics,conventional,conventional.a.04,unimaginative and conformist,conventional bourgeois lives,unconventional.a.02,unconventional,not conventional or conformist,unconventional life styles
107,Politics,conventional,conventional.s.06,in accord with or being a tradition or practic...,a conventional church wedding with the bride i...,modern.s.05,modern,used of a living language and being the curren...,modern English
108,Politics,conservative,conservative.a.01,resistant to change,they were very conservative in their outlook,liberal.a.03,liberal,tolerant of change and not bound by authoritar...,they have liberal views on divorce
109,Politics,conservative,conservative.s.02,having social or political views favoring cons...,they were very conservative in their outlook,liberal.s.02,liberal,having political or social views favoring refo...,they have liberal views on divorce


In [5]:
# Checking length of individual dictionaries. 

print(f"""# of Dimensions Sociability: {len(final_dimensions_manual[final_dimensions_manual["Dictionary"] == "Sociability"])} \n
# of Dimensions Ability: {len(final_dimensions_manual[final_dimensions_manual["Dictionary"] == "Ability"])} \n
# of Dimensions Status: {len(final_dimensions_manual[final_dimensions_manual["Dictionary"] == "Status"])} \n
# of Dimensions Agency: {len(final_dimensions_manual[final_dimensions_manual["Dictionary"] == "Agency"])} \n
# of Dimensions Morality: {len(final_dimensions_manual[final_dimensions_manual["Dictionary"] == "Morality"])} \n
# of Dimensions Politics: {len(final_dimensions_manual[final_dimensions_manual["Dictionary"] == "Politics"])}
______________________________________\n
Total # of Dimensions: {len(final_dimensions_manual)}""")

# of Dimensions Sociability: 25 

# of Dimensions Ability: 24 

# of Dimensions Status: 8 

# of Dimensions Agency: 20 

# of Dimensions Morality: 26 

# of Dimensions Politics: 8
______________________________________

Total # of Dimensions: 111


### 3. Create Files compatible with SensePOLAR

To be compatible with the SensePolar framework, seed word lists are converted into different text and pkl files.

In [152]:
def create_antonym_text_files(df):

    example_sentences_readable = {}
    get_lookup_anto_example_dict = []
    get_lookup_synset_definition = []
    get_lookup_synset_dict = []
    
    for entry in tqdm(df.index):
        
        # Create antonym_wordnet_example_sentences_readable_extended.txt
        antonym_pair = str([str(df.at[entry, "Synset1"]), str(df.at[entry, "Synset2"])])
        
        term1 = df.at[entry, "Term1"]
        term2 = df.at[entry, "Term2"]        
        example1 = df.at[entry, "Example1"]
        example2 = df.at[entry, "Example2"]
        
        sub_dict = {term1:[example1], term2:[example2]}
        
        example_sentences_readable[antonym_pair] = sub_dict
    
        # Create get_lookup_anto_example_dict. 
        get_lookup_anto_example_dict.append([[term1, [example1]], [term2, [example2]]])
        
        # Create get_lookup_synset_definition. 
        definition1 = df.at[entry, "Definition1"]
        definition2 = df.at[entry, "Definition2"]
        get_lookup_synset_definition.append([definition1, definition2])
        
        # Create get_lookup_synset_dict. 
        get_lookup_synset_dict.append(antonym_pair)

    # Create text files. 
    with open('antonyms/antonym_wordnet_example_sentences_readable_extended.txt', 'w') as file:
        file.write(json.dumps(example_sentences_readable, indent=4))
    with open('antonyms/lookup_anto_example_dict.txt', 'w') as file:
        file.write(json.dumps(get_lookup_anto_example_dict, indent=4))
    with open('antonyms/lookup_synset_definition.txt', 'w') as file:
        file.write(json.dumps(get_lookup_synset_definition, indent=4))        
    with open('antonyms/lookup_synset_dict.txt', 'w') as file:
        file.write(json.dumps(get_lookup_synset_dict, indent=4))
        
    # Create pkl files.    
    with open('antonyms/lookup_anto_example_dict.pkl', 'wb') as file:
        pickle.dump(get_lookup_anto_example_dict, file)
    with open('antonyms/lookup_synset_definition.pkl', 'wb') as file:
        pickle.dump(get_lookup_synset_definition, file)
    with open('antonyms/lookup_synset_dict.pkl', 'wb') as file:
        pickle.dump(get_lookup_synset_dict, file)

In [153]:
create_antonym_text_files(final_dimensions_manual)

  0%|          | 0/111 [00:00<?, ?it/s]