### Imports

In [7]:
import pandas as pd
from tqdm.notebook import tqdm
import numpy as np
from nltk.corpus import wordnet as wn
import json
import pickle

### 1. Process Seed Word Dictionaries

Processing the seed word list from: 

- *Nicolas, G., Bai, X., & Fiske, S. T. (2021). Comprehensive stereotype content dictionaries using a semi‐automated method. In European Journal of Social Psychology (Vol. 51, Issue 1, pp. 178–196). Wiley. https://doi.org/10.1002/ejsp.2724* 

which is included in the file *finalwordlist R 050418.csv* in the online supplement found under https://osf.io/yx45f/. 

In [8]:
# Read seed word list. 
seed_words = pd.read_csv("finalwordlist R 050418.csv")
seed_words = seed_words[seed_words["PoS"] == "ADJECTIVE"]


# Add relevant WordNet information. 
def get_synset(row):
    return wn.synsets(row["term"], pos = wn.ADJ)[row["sense"]-1].name()

def get_definition(row):
    return wn.synsets(row["term"], pos = wn.ADJ)[row["sense"]-1].definition()

def get_example(row):
    try: 
        return wn.synsets(row["term"], pos = wn.ADJ)[row["sense"]-1].examples()[0]
    except:
        pass
      
def get_antonyms(row):
    antonyms = []
    for lemma in wn.synsets(row["term"], pos = wn.ADJ)[row["sense"]-1].lemmas():
        try: 
            for antonym in lemma.antonyms(): 
                antonyms.append(antonym.synset().name())
        except: 
            pass
    try: 
        return antonyms[0]
    except:
        pass
        
# Applying functions. 
seed_words["Synset"] = seed_words.apply(lambda row: get_synset(row), axis=1)
seed_words["Definition"] = seed_words.apply(lambda row: get_definition(row), axis=1)
seed_words["Examples"] = seed_words.apply(lambda row: get_example(row), axis=1)
seed_words["Antonyms"] = seed_words.apply(lambda row: get_antonyms(row), axis=1)

# Create individual dictionaries. 
sociability_seed = seed_words[seed_words["Dictionary"] == "Sociability"]
ability_seed = seed_words[seed_words["Dictionary"] == "Ability"]
status_seed = seed_words[seed_words["Dictionary"] == "Status"]
agency_seed = seed_words[seed_words["Dictionary"] == "Agency"]
morality_seed = seed_words[seed_words["Dictionary"] == "Morality"]
politics_seed = seed_words[seed_words["Dictionary"] == "Politics"]

### 2. Match Antonyms

In [9]:
def clean_and_match_antonyms(df):
    
    # Split dictionary into high and low and remove dimensions with missing matches. 
    dict_high = df[(df["Dir"] == "high") & (df["Antonyms"].notnull())]
    dict_low = df[(df["Dir"] == "low") & (df["Antonyms"].notnull())]
    
    # Clean and rename columns. 
    dict_high = dict_high[["term", "Synset", "Definition", "Examples", "Antonyms"]]
    dict_high = dict_high.rename(columns={"term":"Term1", "Synset": 'Synset1', "Definition": "Definition1", 
                                          "Examples":"Example1", "Antonyms": "Synset2"})
    
    dict_low = dict_low[["term", "Synset", "Definition", "Examples"]]
    dict_low = dict_low.rename(columns={"term":"Term2", "Synset": 'Synset2', "Definition": "Definition2", 
                                       "Examples": "Example2"})
    
    # Merge both dictionaries. 
    dict_merged = pd.merge(dict_high, dict_low, on=["Synset2"])
    dict_merged = dict_merged.dropna().reset_index(drop = True)
    dict_merged.drop_duplicates(inplace = True)

    return dict_merged

In [23]:
# Clean and match antonym dictionaries 

sociability_seed_matched = clean_and_match_antonyms(sociability_seed)
ability_seed_matched = clean_and_match_antonyms(ability_seed)
status_seed_matched = clean_and_match_antonyms(status_seed)
agency_seed_seed_matched = clean_and_match_antonyms(agency_seed)
morality_seed_matched = clean_and_match_antonyms(morality_seed)
politics_seed_matched = clean_and_match_antonyms(politics_seed)

In [24]:
len(sociability_seed_matched)

11

In [25]:
len(ability_seed_matched)

9

In [26]:
len(status_seed_matched)

1

In [27]:
len(agency_seed_seed_matched)

6

In [28]:
len(morality_seed_matched)

13

In [29]:
len(politics_seed_matched)

2

### 3. Create Files compatible with SensePOLAR

To be compatible with the SensePolar framework, seed word lists are converted into different text and pkl files.

In [5]:
def create_antonym_text_files(df):

    example_sentences_readable = {}
    get_lookup_anto_example_dict = []
    get_lookup_synset_definition = []
    get_lookup_synset_dict = []
    
    for entry in tqdm(df.index):
        
        # Create antonym_wordnet_example_sentences_readable_extended.txt
        antonym_pair = str([str(df.at[entry, "Synset1"]), str(df.at[entry, "Synset2"])])
        
        term1 = df.at[entry, "Term1"]
        term2 = df.at[entry, "Term2"]        
        example1 = df.at[entry, "Example1"]
        example2 = df.at[entry, "Example2"]
        
        sub_dict = {term1:[example1], term2:[example2]}
        
        example_sentences_readable[antonym_pair] = sub_dict
    
        
        # Create get_lookup_anto_example_dict. 
        
        get_lookup_anto_example_dict.append([[term1, [example1]], [term2, [example2]]])
        
        
        # Create get_lookup_synset_definition. 
        definition1 = df.at[entry, "Definition1"]
        definition2 = df.at[entry, "Definition2"]
        get_lookup_synset_definition.append([definition1, definition2])
        
        # Create get_lookup_synset_dict. 
        get_lookup_synset_dict.append(antonym_pair)

    # Create text files. 
    with open('antonyms/antonym_wordnet_example_sentences_readable_extended.txt', 'w') as file:
        file.write(json.dumps(example_sentences_readable, indent=4))
    with open('antonyms/lookup_anto_example_dict.txt', 'w') as file:
        file.write(json.dumps(get_lookup_anto_example_dict, indent=4))
    with open('antonyms/lookup_synset_definition.txt', 'w') as file:
        file.write(json.dumps(get_lookup_synset_definition, indent=4))        
    with open('antonyms/lookup_synset_dict.txt', 'w') as file:
        file.write(json.dumps(get_lookup_synset_dict, indent=4))
        
    # Create pkl files.    
    with open('antonyms/lookup_anto_example_dict.pkl', 'wb') as file:
        pickle.dump(get_lookup_anto_example_dict, file)
    with open('antonyms/lookup_synset_definition.pkl', 'wb') as file:
        pickle.dump(get_lookup_synset_definition, file)
    with open('antonyms/lookup_synset_dict.pkl', 'wb') as file:
        pickle.dump(get_lookup_synset_dict, file)

In [6]:
create_antonym_text_files(ability_seed_matched)

  0%|          | 0/9 [00:00<?, ?it/s]