In [1]:
# Load the word-vector dictionary

import sys
sys.path.insert(0, '../')
from wordvecs import WordVecsDict

dic = WordVecsDict()

# Can use word2vec or GloVe vectors
dictFileName = '../dicts/word2vec-GoogleNews-vecs300-norm.pickle'#'../dicts/glove-crawl840B-vecs300-norm.pickle'#
dic.loadDict(dictFileName)

In [2]:
# Load the mean ratings for all comparisons

import cPickle
ratingsFileName = 'relsim_mean_ratings.pickle'
with open(ratingsFileName, 'rb') as ratingsFile:
    comp_ratings = cPickle.load(ratingsFile)

In [3]:
# Evaluate the model (word2vec or GloVe) on the relational similarity ratings
# for each relational type

import numpy as np
from numpy.linalg import norm
from scipy.stats import pearsonr

num_rels = 10

# Get the ratings and predictions for each relational type

ratings_each_rel = [[] for i in xrange(num_rels)]
cos_each_rel = [[] for i in xrange(num_rels)]
euc_each_rel = [[] for i in xrange(num_rels)]

for comp, values in comp_ratings.iteritems():
    rel1 = comp[0]
    rel2 = comp[1]
    pair1 = comp[2]
    pair2 = comp[3]
    
    if dic.hasWords(*pair1) and dic.hasWords(*pair2):
        reltype1 = int(rel1[:-1])
        reltype2 = int(rel2[:-1])
        if reltype1 == reltype2:   # within-type comparisons
            rel = reltype1 - 1
            mean_rating, count = values

            cos_sim = dic.relationalSim(pair1, pair2, method='cosine')
            euc_sim = dic.relationalSim(pair1, pair2, method='euclidean')
            
            ratings_each_rel[rel].append(mean_rating)
            cos_each_rel[rel].append(cos_sim)
            euc_each_rel[rel].append(euc_sim)
            
            
# Display Pearson's r between human ratings and cosine/Euclidean similarity
for reltype in xrange(num_rels):
    rated_sims = ratings_each_rel[reltype]
    cos_sims = cos_each_rel[reltype]
    euc_sims = euc_each_rel[reltype]
    cos_r = pearsonr(rated_sims, cos_sims)[0]
    euc_r = pearsonr(rated_sims, euc_sims)[0]
    
    print 'Relation', reltype + 1
    print 'cosine r =', cos_r
    print 'Euclidean r =', euc_r
    print

Relation 1
cosine r = 0.199051160842
Euclidean r = 0.292203255887

Relation 2
cosine r = 0.324399970986
Euclidean r = 0.227347147063

Relation 3
cosine r = -0.05327534789
Euclidean r = 0.0343375900588

Relation 4
cosine r = 0.00815956991989
Euclidean r = 0.148547523447

Relation 5
cosine r = 0.271585365358
Euclidean r = 0.245001890691

Relation 6
cosine r = -0.0108095331659
Euclidean r = 0.159617985456

Relation 7
cosine r = 0.504112348157
Euclidean r = 0.476912075186

Relation 8
cosine r = 0.0848324348175
Euclidean r = 0.0947301134938

Relation 9
cosine r = 0.300300821497
Euclidean r = 0.480926891516

Relation 10
cosine r = 0.0619665066231
Euclidean r = 0.222845272332

