In [1]:
import os
import time
import bz2
import codecs
import pickle as pkl

import numpy as np
from scipy import stats

from models import *

In [2]:
dep_based = load_model("deps.words")
bow2_based = load_model("bow2.words")
bow5_based = load_model("bow5.words")

In [3]:
all_models = [['Dependency', dep_based], 
              ['BoW with k = 2', bow2_based], 
              ['BoW with k = 5', bow5_based]]

In [4]:
# Read Simlex999.txt 
def read_sim_file(filename):
    pairs_list = []
    pairs_dict = {}
    with open(filename, "r") as f:
        for _, line in enumerate(f):
            line = line.strip().split()
            pairs_list.append((line[0], line[1], line[3]))
            pairs = pairs_list[1:]
    for word1, word2, score in pairs:
        if word1 not in pairs_dict.keys():
            pairs_dict[word1] = []
        pairs_dict[word1].append((word2,float(score)))
# Sorting by scores
    for word1, list_value in pairs_dict.items():
        list_value.sort(key=lambda _: -_[1])
    return pairs_dict


In [5]:
#Read MEN-Dataset-Natural-Full-Form
def read_men_file(filename):
    pairs_list = []
    pairs_dict = {}
    with open(filename, "r") as f:
        for _, line in enumerate(f):
            line = line.strip().split()
            pairs_list.append((line[0], line[1], line[2]))
    for word1, word2, score in pairs_list:
        if word1 not in pairs_dict.keys():
            pairs_dict[word1] = []
        pairs_dict[word1].append((word2, float(score)))
    
    for key, value in pairs_dict.items():
        value.sort(key=lambda _: -_[1])
        
    return pairs_dict

In [6]:
simlex_999 = read_sim_file("data/SimLex-999/SimLex-999.txt")

In [7]:
simlex_999["happy"]

[('cheerful', 9.55),
 ('glad', 9.17),
 ('young', 2.0),
 ('angry', 1.28),
 ('mad', 0.95)]

In [8]:
men_natural = read_men_file("data/MEN/MEN_dataset_natural_form_full")

In [9]:
men_natural["happy"]

[('smile', 40.0),
 ('love', 39.0),
 ('kids', 29.0),
 ('sexy', 28.0),
 ('tears', 27.0),
 ('kiss', 26.0),
 ('mom', 26.0),
 ('time', 24.0),
 ('home', 23.0),
 ('morning', 22.0),
 ('idea', 22.0),
 ('sleep', 22.0),
 ('stand', 17.0),
 ('night', 17.0),
 ('toes', 16.0),
 ('posted', 14.0),
 ('walk', 14.0),
 ('sitting', 11.0),
 ('licking', 11.0),
 ('stop', 5.0),
 ('rusty', 3.0)]

In [10]:
eval_models = [["SimLex-999", simlex_999], 
               ["MEN (Natural) Full Form", men_natural]]

In [11]:
# Evaluation
# For each common word from Simlex vs. models
#     Find length of Simlex data
#         Normalize scores of Simlex data in the range [0,1]
#     Read similar length data in models
#     Store scores in (a, b) - for spearman (two 1D arrays of the scores)

for item in eval_models:
    start_time = time.time()
    for comp_item in all_models:
        a = np.zeros((len(item[1]),1))
        b = np.zeros((len(item[1]),1))
        i = 0
        for key, value in item[1].items():
            for model_word in comp_item[1].word_index.keys():
                if (key == model_word):
                    eval_length = len(value)
                    model_all_values = comp_item[1].most_similar(key, score = True, n = eval_length)
                    normalized_sum = 0.0

                    for _, score in value:
                        normalized_sum += score
                    for _, score in value:
                        score = round(score/normalized_sum, 3)
                        a[i] = score
                    for _, model_score in model_all_values:
                        b[i] = model_score
            i=i+1
            rho, _ = stats.spearmanr(a, b, axis = None)
            val, _ = stats.pearsonr(a, b)
        print(item[0], ' vs. ', comp_item[0], ' ~ Spearman Correlation: ', rho)
        print(item[0], ' vs. ', comp_item[0], ' ~ Pearson Correlation: ', val)
    print("Comparing against ", item[0], " for all models took ", time.time() - start_time, " seconds.\n")

SimLex-999  vs.  Dependency  ~ Spearman Correlation:  0.35255616374878085
SimLex-999  vs.  Dependency  ~ Pearson Correlation:  [0.34894498]
SimLex-999  vs.  BoW with k = 2  ~ Spearman Correlation:  0.4187383017559465
SimLex-999  vs.  BoW with k = 2  ~ Pearson Correlation:  [0.42075226]
SimLex-999  vs.  BoW with k = 5  ~ Spearman Correlation:  0.37648799239764275
SimLex-999  vs.  BoW with k = 5  ~ Pearson Correlation:  [0.37054853]
Comparing against  SimLex-999  for all models took  3852.2572481632233  seconds.

MEN (Natural) Full Form  vs.  Dependency  ~ Spearman Correlation:  0.26840465221879417
MEN (Natural) Full Form  vs.  Dependency  ~ Pearson Correlation:  [0.31301145]
MEN (Natural) Full Form  vs.  BoW with k = 2  ~ Spearman Correlation:  0.35724301961763083
MEN (Natural) Full Form  vs.  BoW with k = 2  ~ Pearson Correlation:  [0.42310006]
MEN (Natural) Full Form  vs.  BoW with k = 5  ~ Spearman Correlation:  0.30218813419558455
MEN (Natural) Full Form  vs.  BoW with k = 5  ~ Pear