# Manual Evaluation of POLAR Dimensions

## 1 Import Data

### 1.1 Import Packages

In [1]:
import gensim
from numpy import linalg
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from tqdm import tqdm_notebook as tqdm
import time
from random import shuffle
import sys
import nltk 
from nltk.corpus import wordnet 
import gc
from collections import defaultdict
import random
import json
import os
import pandas as pd
import pickle

import plotly
import numpy as np
import plotly.graph_objs as go
from sklearn.decomposition import PCA

from sklearn.cluster import DBSCAN

from functools import partial
import ipywidgets as widgets
from IPython.display import clear_output
from ipywidgets import IntProgress
from random import randint

from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import cohen_kappa_score
from sklearn.metrics import multilabel_confusion_matrix

import torch

from gensim.scripts.glove2word2vec import glove2word2vec
from gensim.test.utils import datapath, get_tmpfile

from gensim.test.utils import datapath

### 1.2 Import Embedding

Import the embedding file you want to use for the testing.

In [2]:
#import embedding for analysis, need to do analysis for every combiantion of pre-trained model and antonyms used 8 in total
company_df = pd.read_csv('../data/processed/POLAR-GloVeWiki-bus-antonyms-inter.csv')
#Also input the embedding name and antonym set used here to get right file names in the end,
#bus=business antonym set we created, org=original antonyms used by POLAR paper
embedding_name = 'GloVeWiki_bus'

In [3]:
#choose 10 companies that are in all embeddings for analysis
#exchange for common-10-companies-google file when using googlenews embeddings
with open("../data/processed/common-10-companies", "rb") as fp:  
    b = pickle.load(fp)
    
analysis_df = company_df.loc[company_df['Unnamed: 0'].isin(b)]
df_names = analysis_df['Unnamed: 0']

## 2 POLAR Embedding Test

### 2.1 Create Test Environment

Functions for creating the test setup are defined.

In [4]:
#create function that handles the POLAR pair search
def get_important_pairs(df, number, name):
    #get the number of highest absolute values
    imp_row = df.loc[df['Unnamed: 0']==name]
    imp_row = imp_row.loc[:,imp_row.columns!='Unnamed: 0']
    imp_columns = imp_row.abs().values.argsort(1)[:, -number:][:, ::-1][0]
    #get the column names
    column_names=[]
    column_list=imp_row.columns.values.tolist()
    for i in imp_columns:
        nam=column_list[i]
        column_names.append(nam)
    value_list=[]
    for item in column_names:
        value_list.append(imp_row[item].values)
    ret_df=pd.DataFrame(column_names)
    ret_df.columns=['top_polar_dim']
    ret_df['top_value']=value_list
    imp_columns_down=imp_row.abs().values.argsort(1)[:, :number][:, ::-1][0]
    down_column_names=[]
    down_value_list=[]
    for i in imp_columns_down:
        nam=column_list[i]
        down_column_names.append(nam)
        down_value_list.append(imp_row[nam].values)
    ret_df['down_polar_dim']=down_column_names
    ret_df['down_value']=down_value_list
    
    return ret_df    

In [5]:
#create a dataset for the test
def create_polar_intruder_dataset(number,bus_df,name):
    company_data=get_important_pairs(bus_df,number,name)
    rand_list=[]
    pos_list=[]
    name_list=[]
    k = random.randint(0, number-1)
    for i in range(number):
        if i==k:
            rand_list.append(company_data['down_polar_dim'].loc[i])
            pos_list.append(k)
            name_list.append(name)
        else:
            rand_list.append(company_data['top_polar_dim'].loc[i])
            pos_list.append(k)
            name_list.append(name)
    company_data['random']= rand_list
    company_data['name'] = name_list
    company_data['position'] = pos_list
    return company_data

In [6]:
#handle intruder creation outside
def create_polar_test_intruder_dataset(df):
    intruder_list=[]
    for bus in df:
        lst=create_polar_intruder_dataset(5,company_df,bus)
        intruder_list.append(lst)
    intruder_list= pd.concat(intruder_list, ignore_index=True)
    return intruder_list

In [7]:
#create function for the test execution
def polar_intrusion_test(df_intruder,name):

    max_count = df_intruder.shape[0]
    global i
    i = 0
    
    button_0 = widgets.Button(description = df_intruder['random'].loc[i])
    button_1 = widgets.Button(description = df_intruder['random'].loc[i+1])
    button_2 = widgets.Button(description = df_intruder['random'].loc[i+2])
    button_3 = widgets.Button(description = df_intruder['random'].loc[i+3])
    button_4 = widgets.Button(description = df_intruder['random'].loc[i+4])
    
    global chosen_positions
    chosen_positions=[]
    
    display("Polar Intrusion Text")
    
    f = IntProgress(min=0, max=max_count)    
    display(f)
    
    display(df_intruder['name'].loc[i])

    display(button_0)
    display(button_1)
    display(button_2)
    display(button_3)
    display(button_4)
    
    def btn_eventhandler(position, obj):
        global i 
        i += 5
        
        clear_output(wait=True)
            
        display("Polar Intrusion Text")
        display(f)
        f.value += 5
        
        global chosen_positions
        chosen_positions+=  5*[position]
                
        if i < max_count:
            
            display(df_intruder['name'].loc[i])

            button_0 = widgets.Button(description = df_intruder['random'].loc[i])
            button_1 = widgets.Button(description = df_intruder['random'].loc[i+1])
            button_2 = widgets.Button(description = df_intruder['random'].loc[i+2])
            button_3 = widgets.Button(description = df_intruder['random'].loc[i+3])
            button_4 = widgets.Button(description = df_intruder['random'].loc[i+4])
            
            display(button_0)
            display(button_1)
            display(button_2)
            display(button_3)
            display(button_4)
            
            button_0.on_click(partial(btn_eventhandler,0))
            button_1.on_click(partial(btn_eventhandler,1))
            button_2.on_click(partial(btn_eventhandler,2))
            button_3.on_click(partial(btn_eventhandler,3))
            button_4.on_click(partial(btn_eventhandler,4))
            
        else:
            print ("Thanks " + name + " you finished all the work!")
            #df_intruder['chosen_word'] = chosen_words
            df_intruder['chosen_position'] = chosen_positions
            df_intruder.to_csv("/Users/stjepankusenic/POLAR_WEBE/data/external/polar_intrusion_test_" + name +'_'+ embedding_name + "_results" + ".csv", index = False)
                
    button_0.on_click(partial(btn_eventhandler,0))
    button_1.on_click(partial(btn_eventhandler,1))
    button_2.on_click(partial(btn_eventhandler,2))
    button_3.on_click(partial(btn_eventhandler,3))
    button_4.on_click(partial(btn_eventhandler,4))

    
    return df_intruder

### 2.2 Test Execution

Execute the test:

In [8]:
#df_test =create_polar_test_intruder_dataset(df_names)
#change the name to your first name!
#df_test1 =polar_intrusion_test(df_test,'Stjepan')

### 2.3 Evaluate the Test

Here we want to see how the annotators performed in testing.

In [9]:
#import the test files you want to analyze
data1 = pd.read_csv('../data/external/Coder-Evaluation-POLAR-dim/Sree-eval/polar_intrusion_test_Sreehari_GloVeWiki_org_results.csv')
data2 = pd.read_csv('../data/external/Coder-Evaluation-POLAR-dim/Xho_eval/polar_intrusion_test_Xhoana_GloVeWiki_org_results.csv')

In [10]:
#create a function that handles the evealuation
def test_evaluation(df1,df2,number,name1, name2):
    master_list1 = [name1]
    master_list2 = [name2]
    max_count= df1.shape[0]
    list1=[]
    list2=[]
    for i in range(number):
        list1.append(df1.loc[i*5])
        list2.append(df2.loc[i*5])
    df1= pd.DataFrame(list1)
    df2= pd.DataFrame(list2)
    df_both=pd.concat([df1,df2])
    f21=f1_score(df1['chosen_position'], df1['position'], average='weighted')
    print('F1 Score Coder 1:',f21)
    master_list1.append(f21)
    f22=f1_score(df2['chosen_position'], df2['position'], average='weighted')
    print('F1 Score Coder 2:',f22)
    master_list2.append(f22)
    print( )
    accuracy_score1=accuracy_score(df1['chosen_position'], df1['position'])
    print('Accuracy Score Coder 1:',accuracy_score1)
    master_list1.append(accuracy_score1)
    accuracy_score2=accuracy_score(df2['chosen_position'], df2['position'])
    print('Accuracy Score Coder 2:',accuracy_score2)
    master_list2.append(accuracy_score2)
    print( )
    precision_score1=precision_score(df1['chosen_position'], df1['position'], average='weighted',zero_division=1)
    print('Precision Score Coder 1:',precision_score1)
    master_list1.append(precision_score1)
    precision_score2=precision_score(df2['chosen_position'], df2['position'], average='weighted',zero_division=1)
    print('Precision Score Coder 2:',precision_score2)
    master_list2.append(precision_score2)
    print( )
    recall_score1=recall_score(df1['chosen_position'], df1['position'], average='weighted',zero_division=1)
    print('Recall Score Coder 1:',recall_score1)
    master_list1.append(recall_score1)
    recall_score2=recall_score(df2['chosen_position'], df2['position'], average='weighted',zero_division=1)
    print('Recall Score Coder 2:',recall_score2)
    master_list2.append(recall_score2)
    print( )
    
    kappa= cohen_kappa_score(df1['chosen_position'],df2['position'])
    print('Cohens Kappa for the Coders:',kappa)
    return master_list1, master_list2


In [11]:
#create a dataframe and save initial results
#list1, list2 = test_evaluation(data1,data2,10,'Stjepan_GloVe_Twitter_bus','Stjepan_GloVe_Twitter_org')
#dataframe = pd.DataFrame([list1],columns=["Name","F1_Score","Accuracy_Score","Precision_Score","Recall_Score"])
#dataframe.loc[len(dataframe)] = list2
#dataframe.to_csv("/Users/stjepankusenic/POLAR_WEBE/data/processed/eval_results_individual")

In [12]:
#save further results into the same datadrame
dataframe= pd.read_csv('../data/processed/eval_results_individual')
dataframe=dataframe.drop(columns=['Unnamed: 0'])
#list1, list2 = test_evaluation(data1,data2,10,'Simran_Reddit_bus','Simran_Reddit_org')
#dataframe.loc[len(dataframe)] = list1
#dataframe.loc[len(dataframe)] = list2
#look at the results
display(dataframe)
#dataframe.to_csv('/Users/stjepankusenic/POLAR_WEBE/data/processed/eval_results_individual')

Unnamed: 0,Name,F1_Score,Accuracy_Score,Precision_Score,Recall_Score
0,Stjepan_GloVe_Twitter_bus,0.35,0.4,0.533333,0.4
1,Stjepan_GloVe_Twitter_org,0.2,0.2,0.316667,0.2
2,Stjepan_GloVe_Wiki_bus,0.433333,0.5,0.4,0.5
3,Stjepan_GloVe_Wiki_org,0.283333,0.3,0.62,0.3
4,Stjepan_GoogleNews_bus,0.166667,0.2,0.35,0.2
5,Stjepan_GoogleNews_org,0.166667,0.2,0.45,0.2
6,Stjepan_Reddit_bus,0.114286,0.1,0.133333,0.1
7,Stjepan_Reddit_org,0.0,0.0,0.0,0.0
8,Renee_GloVe_Twitter_bus,0.085714,0.1,0.175,0.1
9,Renee_GloVe_Twitter_org,0.146667,0.2,0.116667,0.2


In [13]:
#look at the results for the inter-annotator test
display(pd.read_excel(r'../data/processed/eval-results-intercoder-glovewiki.xlsx'))

Unnamed: 0,Coder-combination,Kappa-bus,Kappa-org
0,Stjepan-Renee,0.367089,-0.282051
1,Stjepan-Simran,0.090909,0.047619
2,Stjepan-Sreehari,0.135802,0.047619
3,Stjepan-Xhoana,0.230769,-0.139241
4,Renee-Simran,-0.139241,0.230769
5,Renee-Sreehari,0.135802,0.047619
6,Renee-Xhoana,0.0,-0.084337
7,Simran-Sreehari,-0.315789,0.102564
8,Simran-Xhoana,0.090909,0.146341
9,Sreehari-Xhoana,-0.139241,-0.125
