In [1]:
from data_loading import Embeddings, load_sim_dataset, load_analogy_dataset
import numpy as np
from scipy.stats import pearsonr, spearmanr

# Evaluating Word Representations

## Word similarity

We will examine the use of word embeddings as representations for the meaning of words. In particular, we will use pretrained word embeddings as obtained in [1]: Dependency based embeddings and bag-of-words embeddings with k = 2 and 5.

In [2]:
# Load word embeddings
bow2embeddings = Embeddings("data/bow2.words")
bow5embeddings = Embeddings("data/bow5.words")
depembeddings = Embeddings("data/deps.words")
embedding_dict = {'BOW2': bow2embeddings, 'BOW5': bow5embeddings, 'Dependency': depembeddings}

# Load similarity datasets
simlex_pairs, simlex_scores = load_sim_dataset("data/SimLex-999.txt", score_col=3, skip=1)
men_pairs, men_scores = load_sim_dataset("data/MEN_dataset_natural_form_full", score_col=2)

### Qualitative results
We can evaluate the word embeddings quantitatively by getting a list of words similar to a given one when using each set of word embeddings.

In [78]:
test_words = ['batman', 'hogwarts', 'turing', 'florida', 'object-oriented', 'dancing']
for emb in embedding_dict:
    print(emb)
    for word in test_words:
        print('\t', word, ':', embedding_dict[emb].top_similar(word))

BOW2
	 batman : ['superman', 'superboy', 'aquaman', 'catwoman', 'batgirl']
	 hogwarts : ['evernight', 'sunnydale', 'garderobe', 'blandings', 'collinwood']
	 turing : ['non-deterministic', 'finite-state', 'nondeterministic', 'bchi', 'primality']
	 florida : ['fla', 'alabama', 'gainesville', 'tallahassee', 'texas']
	 object-oriented : ['aspect-oriented', 'event-driven', 'objective-c', 'dataflow', '4gl']
	 dancing : ['singing', 'dance', 'dances', 'breakdancing', 'clowning']
BOW5
	 batman : ['superman', 'catwoman', 'nightwing', 'spider-man', 'superboy']
	 hogwarts : ['dumbledore', 'dementors', 'hagrid', 'snape', 'voldemort']
	 turing : ['non-deterministic', 'nondeterministic', 'deterministic', 'finite-state', 'reducibility']
	 florida : ['jacksonville', 'tallahassee', 'miami', 'gainesville', 'sarasota']
	 object-oriented : ['aspect-oriented', 'event-driven', 'smalltalk', 'prolog', 'domain-specific']
	 dancing : ['singing', 'dance', 'dances', 'dancers', 'danced']
Dependency
	 batman : ['sup

### Quantitative results

In [4]:
def score_pairs(pairs, scores, embs, label):
    gold_scores = []
    sim_scores = []
    for i, (word1, word2) in enumerate(pairs):
        if word1 in embs.word2idx and word2 in embs.word2idx:
            # Get score from gold standard
            gold_scores.append(scores[i])
            
            # Calculate cosine similarity
            sim_scores.append(embs.similarity(word1, word2))
            
    # Get pearson and spearman correlation
    pearson_coeff = pearsonr(gold_scores, sim_scores)[0]
    spearman_coeff = spearmanr(gold_scores, sim_scores).correlation
    
    print('{:>10} {:^8.4f}  {:^8.4f}'.format(label, pearson_coeff, spearman_coeff))

print('{:10} {:^8s}  {:^8s}'.format('', 'Pearson', 'Spearman'))
for emb_name in embedding_dict:
    print('{:s}'.format(emb_name))    
    score_pairs(simlex_pairs, simlex_scores, embedding_dict[emb_name], 'SimLEX')    
    score_pairs(men_pairs, men_scores, embedding_dict[emb_name], 'MEN')

           Pearson   Spearman
BOW2
    SimLEX  0.4285    0.4141 
       MEN  0.6777    0.6999 
BOW5
    SimLEX  0.3756    0.3674 
       MEN  0.7082    0.7232 
Dependency
    SimLEX  0.4619    0.4456 
       MEN  0.5974    0.6178 


## Word Analogy

We now examine the usage of word embeddings to solve questions of the form "**a** is to **b** as **c** is to **?**".

### Qualitative results

In [3]:
answers = bow5embeddings.analogy('man', 'king', 'woman')
print(answers[:5])

['queen', 'princess', 'berengaria', 'monarch', 'king-emperor']


### Quantitative results

In [4]:
analogies = load_analogy_dataset('data/questions-words.txt')

def get_analogy_stats(embs, analogies):
    count = 0
    correct = 0
    acc_rank = 0
    for i, (a, b, c, d) in enumerate(analogies):
        # Check first if we have embeddings for all words
        if all (word in embs.word2idx for word in (a, b, c)):
            count += 1
            # Get analogy results
            results = embs.analogy(a, b, c)

            # Update accuracy
            if results[0] == d:
                correct += 1

            # Update MRR
            try:            
                rank = results.index(d) + 1            
                acc_rank += 1/rank          
            except ValueError:
                # If word was not ranked, give rank score = 0
                continue
        
        # Print progress and stats so far
        if (count%1000) == 0:
            accuracy = int(100 * correct/count)
            mrr = int(100 * acc_rank/count)
            print('{:d}/{:d} acc: {:d} mrr: {:d}'.format(count, len(analogies), accuracy, mrr))

    accuracy = int(100 * correct/count)
    mrr = int(100 * acc_rank/count)
    print('acc: {:d} mrr: {:d}'.format(accuracy, mrr))

for emb_name in embedding_dict:
    print('{:s}'.format(emb_name))
    get_analogy_stats(embedding_dict[emb_name], analogies)

BOW2
1000/19544 acc: 71 mrr: 78
2000/19544 acc: 68 mrr: 76
3000/19544 acc: 67 mrr: 75
4000/19544 acc: 67 mrr: 75
5000/19544 acc: 65 mrr: 73
6000/19544 acc: 57 mrr: 65
7000/19544 acc: 54 mrr: 62
8000/19544 acc: 52 mrr: 60
9000/19544 acc: 52 mrr: 61
10000/19544 acc: 49 mrr: 58
11000/19544 acc: 50 mrr: 58
12000/19544 acc: 53 mrr: 61
13000/19544 acc: 54 mrr: 62
14000/19544 acc: 54 mrr: 62
15000/19544 acc: 55 mrr: 64
16000/19544 acc: 56 mrr: 64
17000/19544 acc: 56 mrr: 64
18000/19544 acc: 57 mrr: 65
19000/19544 acc: 58 mrr: 66
acc: 58 mrr: 66
BOW5
1000/19544 acc: 82 mrr: 87
2000/19544 acc: 77 mrr: 84
3000/19544 acc: 75 mrr: 83
4000/19544 acc: 73 mrr: 82
5000/19544 acc: 72 mrr: 81
6000/19544 acc: 64 mrr: 73
7000/19544 acc: 62 mrr: 70
8000/19544 acc: 60 mrr: 69
9000/19544 acc: 60 mrr: 69
10000/19544 acc: 56 mrr: 65
11000/19544 acc: 56 mrr: 65
12000/19544 acc: 58 mrr: 67
13000/19544 acc: 58 mrr: 67
14000/19544 acc: 59 mrr: 68
15000/19544 acc: 60 mrr: 69
16000/19544 acc: 61 mrr: 69
17000/19544 

### References
[1] Levy, O., & Goldberg, Y. (2014). Dependency-based word embeddings. In Proceedings of the 52nd Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers) (Vol. 2, pp. 302-308).