In [1]:
import os
import random
import pickle
from collections import defaultdict, Counter

import numpy as np

from sklearn.manifold import TSNE, Isomap
from sklearn.decomposition import PCA

import matplotlib.pyplot as plt
import scipy
%matplotlib inline

from models import *

In [2]:
plt.rcParams["figure.figsize"] = (15, 20)

In [3]:
def read_analogy_data(path):
    with open(path) as reader:
        analogy_data = []
        task_labels = []
        for line in reader:
            if line.startswith(":"):
                task = line.strip().strip(":").strip()
                continue
            # convert to lower-case 
            analogy_data.append(line.strip().lower().split())
            task_labels.append(task)
    return analogy_data, task_labels
analogy_data, task_labels = read_analogy_data("./data/questions-words.txt")

In [4]:
# list of sub-categories
analogy_data[:10], task_labels[:10]

([['athens', 'greece', 'baghdad', 'iraq'],
  ['athens', 'greece', 'bangkok', 'thailand'],
  ['athens', 'greece', 'beijing', 'china'],
  ['athens', 'greece', 'berlin', 'germany'],
  ['athens', 'greece', 'bern', 'switzerland'],
  ['athens', 'greece', 'cairo', 'egypt'],
  ['athens', 'greece', 'canberra', 'australia'],
  ['athens', 'greece', 'hanoi', 'vietnam'],
  ['athens', 'greece', 'havana', 'cuba'],
  ['athens', 'greece', 'helsinki', 'finland']],
 ['capital-common-countries',
  'capital-common-countries',
  'capital-common-countries',
  'capital-common-countries',
  'capital-common-countries',
  'capital-common-countries',
  'capital-common-countries',
  'capital-common-countries',
  'capital-common-countries',
  'capital-common-countries'])

In [5]:
Counter(task_labels)

Counter({'capital-common-countries': 506,
         'capital-world': 4524,
         'city-in-state': 2467,
         'currency': 866,
         'family': 506,
         'gram1-adjective-to-adverb': 992,
         'gram2-opposite': 812,
         'gram3-comparative': 1332,
         'gram4-superlative': 1122,
         'gram5-present-participle': 1056,
         'gram6-nationality-adjective': 1599,
         'gram7-past-tense': 1560,
         'gram8-plural': 1332,
         'gram9-plural-verbs': 870})

In [6]:
bow2_sim = load_model("bow2.words")
#bow5_sim = load_model("bow5.words")
#deps_sim = load_model("deps.words")

# models = {
#     "bow2": bow2_sim,
#     "bow5": bow5_sim,
#     "deps": deps_sim
# }

In [7]:
def reciprocal_rank(correct_value, results):
    try:
        position = results.index(correct_value)
        return 1 / (position + 1)
    except:
        return 0 

# tests
print(reciprocal_rank("cats", ["catten", "cati", "cats"]))
print(reciprocal_rank("tori", ["catten", "tori", "cats"]))
print(reciprocal_rank("virus", ["virus", "cati", "cats"]))

0.3333333333333333
0.5
1.0


In [8]:
def compute_wv(model, a, a_star, b):
    if a not in model.word_index or a_star not in model.word_index or b not in model.word_index:
        return None
    a, a_star, b = model[a], model[a_star], model[b]
    v = a_star - a
    b_star = b + v
    return b_star

In [None]:
def cos_csim(matrix, vector):
    """
    Compute the cosine distances between each row of matrix and vector.
    """
    v = vector.reshape(1, -1)
    return 1 - scipy.spatial.distance.cdist(matrix, v, 'cosine').reshape(-1)

In [None]:
def evaluate_model(model, data, given_task_label):
    overall_correct = []
    overall_rr = []
    task_correct = defaultdict(list)
    task_rr = defaultdict(list)
    skipped = 0
    rev_index = dict([(v,k) for k, v in model.word_index.items()])
    embeddings = np.array(model.embeddings)
    for index, (tlab, (a, a_star, b, b_star_actual)) in enumerate(zip(task_labels, data)):
        if tlab != given_task_label:
            continue
        if b_star_actual not in model.word_index:
            skipped += 1
            continue
        b_star = compute_wv(model, a, a_star, b)

        if b_star is None:
            skipped += 1
            continue
        results_score = cos_cdist(embeddings, b_star)
        results = [(rev_index[idx], result) for idx, result in enumerate(results_score)]
        results.sort(key=lambda _ : -_[1])
        # exclude these
        results = [r[0] for r in results if r[0] not in {a, a_star, b}]
        if results[0] == b_star_actual:
            overall_correct.append(1)
            task_correct[tlab].append(1)
        else:
            overall_correct.append(0)
            task_correct[tlab].append(0)
        
        overall_rr.append(reciprocal_rank(b_star_actual, results))
        task_rr[tlab].append(reciprocal_rank(b_star_actual, results))

        if index % 100 == 0:
            print(index)
    
    accuracy = sum(overall_correct) / len(overall_correct)
    print("Accuracy: {}, MRR: {}".format(accuracy, np.mean(overall_rr)))
        
evaluate_model(bow2_sim, analogy_data, "capital-common-countries")

0
100
200
300
400


In [None]:
evaluate_model(bow2_sim, analogy_data, "family")

In [None]:
evaluate_model(bow2_sim, analogy_data, "currency")

In [None]:
evaluate_model(bow2_sim, analogy_data, "gram9-plural-verbs")