# A simple implementation of skipgrams with negative sampling
Author: Pierre Nugues

Adapted from _Distributed Representations of Words and Phrases and their Compositionality_, Sect. 2.2, by Mikolov et al. 2013.

The imports

In [1]:
import tensorflow as tf
from tensorflow.keras import backend
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Embedding, Lambda, Average, GlobalAveragePooling1D, Dot, Input, Reshape, Activation
import regex as re
import os
from tensorflow.keras.utils import to_categorical
import numpy as np
from scipy.spatial.distance import cosine
from tqdm import tqdm
from random import shuffle, randint
from collections import Counter
import math, random

## Parameters

The embedding size, context size, and negative counts

In [2]:
embedding_dim = 100
w_size = 2
c_size = w_size * 2 + 1
K_NEG = 5
t = 1e-3
power = 0.75

## The Corpus

We select a dataset and execute locally or on colab

In [3]:
dataset = 'dickens'  # 'homer' dickens' 'selma' 'big'
colab = False # On my machine or on colab
debug = False
DOWNSAMPLING = False

In [4]:
if colab:
    BASE_PATH = '/content/drive/My Drive/Colab Notebooks/'
else:
    BASE_PATH = '../../../'

In [5]:
if colab:
    from google.colab import drive
    drive.mount('/content/drive')

We read the files from a folder

In [6]:
def get_files(dir, suffix):
    """
    Returns all the files in a folder ending with suffix
    :param dir:
    :param suffix:
    :return: the list of file names
    """
    files = []
    for file in os.listdir(dir):
        if file.endswith(suffix):
            files.append(file)
    return files


def load_corpus(path):
    files = get_files(path, 'txt')
    files = [path + file for file in files]
    print(files)
    text = ''
    for file in files:
        text += open(file).read()
    return text

In [7]:
if dataset == 'homer':
    #text = 'Sing, O goddess, the anger of Achilles son of Peleus'.lower()
    text1 = open(BASE_PATH + 'corpus/iliad.mb.txt', encoding='utf-8').read().lower()
    text2 = open(BASE_PATH + 'corpus/odyssey.mb.txt', encoding='utf-8').read().lower()
    text = text1 + text2
    test_words = ['he', 'she', 'ulysses', 'penelope', 'achaeans', 'trojans']
if dataset == 'dickens':
    path = BASE_PATH + 'corpus/Dickens/'
    text = load_corpus(path)
    test_words = ['he', 'she', 'paris', 'london', 'table', 'rare', 'monday', 'sunday', 'man', 'woman', 'king', 'queen', 'boy',
                  'girl']
elif dataset == 'selma':
    path = BASE_PATH + 'corpus/Selma/'
    text = load_corpus(path)
    test_words = ['han', 'hon', 'att', 'bord', 'bordet', 'måndag', 'söndag', 'man', 'kvinna', 'kung', 'drottning',
                  'pojke', 'flicka']
elif dataset == 'big':
    path = BASE_PATH + 'corpus/Dickens/'
    text = load_corpus(path)
    path = BASE_PATH + 'corpus/Norvig/'
    text += load_corpus(path)
    test_words = ['he', 'she', 'paris', 'london', 'table', 'rare', 'monday', 'sunday', 'man', 'woman', 'king', 'queen', 'boy',
                  'girl']    

['../../../corpus/Dickens/Hard Times.txt', '../../../corpus/Dickens/Oliver Twist.txt', '../../../corpus/Dickens/Great Expectations.txt', '../../../corpus/Dickens/The Old Curiosity Shop.txt', '../../../corpus/Dickens/A Tale of Two Cities.txt', '../../../corpus/Dickens/Dombey and Son.txt', '../../../corpus/Dickens/The Pickwick Papers.txt', '../../../corpus/Dickens/Bleak House.txt', '../../../corpus/Dickens/Our Mutual Friend.txt', '../../../corpus/Dickens/The Mystery of Edwin Drood.txt', '../../../corpus/Dickens/Nicholas Nickleby.txt', '../../../corpus/Dickens/David Copperfield.txt', '../../../corpus/Dickens/Little Dorrit.txt', '../../../corpus/Dickens/A Christmas Carol in Prose.txt']


## Processing the Corpus

### Tokenizing

We set all the text in lowercase

In [8]:
text = text.lower()
word_seq = re.findall('\p{L}+', text)
word_seq[:5]

['hard', 'times', 'and', 'reprinted', 'pieces']

### Downsampling

We can downsample the frequent words. We first count the words. We will have to count them again after sampling

In [9]:
counts = Counter(word_seq)

In [10]:
counts['the']

157339

In [11]:
counts['penelope']

0

In [12]:
discard_probs = dict(counts)
word_cnt = sum(discard_probs.values())

In [13]:
for key in discard_probs:
    discard_probs[key] = max(0, 1 - math.sqrt(t/(discard_probs[key]/word_cnt)))

In [14]:
# discard_probs['the']

In [15]:
# discard_probs['he']

In [16]:
subsampled_word_seq = []
for word in word_seq:
    if discard_probs[word] < np.random.random():
        subsampled_word_seq += [word]

In [17]:
if DOWNSAMPLING:
    word_seq = subsampled_word_seq

### Counting the words

In [18]:
counts = Counter(word_seq)

In [19]:
counts['the']

157339

In [20]:
counts['penelope']

0

In [21]:
word_cnt = sum(counts.values())
word_cnt

3355456

We extract the unique words

In [22]:
unique_words = sorted(list(counts.keys()))
unique_words[:10]

['a',
 'aaron',
 'aback',
 'abaft',
 'abandon',
 'abandoned',
 'abandoning',
 'abandonment',
 'abandons',
 'abase']

In [23]:
vocab_size = len(unique_words)
vocab_size

35221

### Indices

And we create indices

In [24]:
word2idx = {word: i for (i, word) in enumerate(unique_words)}
idx2word = {v: k for k, v in word2idx.items()}
#word2idx

We map the words to their indices and we get the sequence of word indices

In [25]:
widx_seq = list(map(word2idx.get, word_seq))
widx_seq[:5]

[14222, 31182, 1071, 25401, 22543]

### Power transform

We apply a power tranform to a list of counts and we return power transformed probabilities

In [26]:
def power_transform(counts, power):
    trfmd_probs = dict()
    for word in counts:
        trfmd_probs[word] = math.pow(counts[word], power)
    sum_probs = sum(trfmd_probs.values())
    for word in trfmd_probs:
        trfmd_probs[word] /= sum_probs
    return trfmd_probs

In [27]:
trfmd_probs = power_transform(counts, power)

We build the index and proability lists for the random choice function

In [28]:
trfmd_probs_idx = {word2idx[k]: v for k, v in trfmd_probs.items()}

In [29]:
draw_idx, probs = zip(*trfmd_probs_idx.items())

Drawing words

In [30]:
random.choices(draw_idx, weights=probs, k=K_NEG * 2 * w_size)

[25126,
 16334,
 20392,
 21249,
 25901,
 6318,
 34385,
 34357,
 34385,
 4,
 22204,
 29397,
 26336,
 17873,
 21106,
 20911,
 20895,
 3703,
 903,
 1224]

## The pairs

For all the words, we form positive and negative pairs. We extract the context words of a word from its neighbors in the word sequence to form the positive pairs and at random to form the negative ones.

In [31]:
def minibatch_generator(widx_seq):
    # A batch consists of the positive pairs generated by a word and its context
    # and the negative pairs: w_size * 2 + K_NEG * w_size * 2 = (K_NEG + 1) * w_size * 2
    for idx, widx in tqdm(enumerate(widx_seq[w_size:-w_size], w_size)):
        positive_pairs = []
        negative_pairs = []
        # We create the start and end indices as in range(start, end)
        start_idx = idx - w_size
        end_idx = idx + w_size + 1
        # We create pairs from the left context: start_idx -> idx and from the right context idx + 1 -> end_idx
        X_i = [widx_seq[idx]] * (K_NEG + 1) * 2 * w_size
        X_c = [widx_seq[c_idx] for c_idx in [*range(start_idx, idx), *range(idx + 1, end_idx)]]
        X_c += random.choices(draw_idx, weights=probs, k=K_NEG * 2 * w_size)
        y = [1] * w_size * 2 + [0] * w_size * 2 * K_NEG
        y = np.array(y)
        X_i = np.array(X_i)
        X_c = np.array(X_c)
        yield X_i, X_c, y

In [32]:
# Old version
"""def minibatch_generator(widx_seq):
    # A batch consists of the positive pairs generated by a word and its context
    # and the negative pairs: w_size * 2 + K_NEG * w_size * 2 = (K_NEG + 1) * w_size * 2
    for idx, widx in tqdm(enumerate(widx_seq[w_size:-w_size], w_size)):
        positive_pairs = []
        negative_pairs = []
        # We create the start and end indices as in range(start, end)
        start_idx = idx - w_size
        end_idx = idx + w_size + 1
        # We create pairs from the left context: start_idx -> idx and from the right context idx + 1 -> end_idx
        X_i = [widx_seq[idx]] * (K_NEG + 1) * 2 * w_size
        X_c = [widx_seq[c_idx] for c_idx in [*range(start_idx, idx), *range(idx + 1, end_idx)]]
        X_c += [widx_seq[randint(0, len(widx_seq) - 1)] for _ in range(K_NEG * 2 * w_size)]
        y = [1] * w_size * 2 + [0] * w_size * 2 * K_NEG
        y = np.array(y)
        X_i = np.array(X_i)
        X_c = np.array(X_c)
        yield X_i, X_c, y"""

'def minibatch_generator(widx_seq):\n    # A batch consists of the positive pairs generated by a word and its context\n    # and the negative pairs: w_size * 2 + K_NEG * w_size * 2 = (K_NEG + 1) * w_size * 2\n    for idx, widx in tqdm(enumerate(widx_seq[w_size:-w_size], w_size)):\n        positive_pairs = []\n        negative_pairs = []\n        # We create the start and end indices as in range(start, end)\n        start_idx = idx - w_size\n        end_idx = idx + w_size + 1\n        # We create pairs from the left context: start_idx -> idx and from the right context idx + 1 -> end_idx\n        X_i = [widx_seq[idx]] * (K_NEG + 1) * 2 * w_size\n        X_c = [widx_seq[c_idx] for c_idx in [*range(start_idx, idx), *range(idx + 1, end_idx)]]\n        X_c += [widx_seq[randint(0, len(widx_seq) - 1)] for _ in range(K_NEG * 2 * w_size)]\n        y = [1] * w_size * 2 + [0] * w_size * 2 * K_NEG\n        y = np.array(y)\n        X_i = np.array(X_i)\n        X_c = np.array(X_c)\n        yield X_i,

In [33]:
# Old old version
"""def minibatch_generator(widx_seq):
    # A batch consists of the positive pairs generated by a word and its context
    # and the negative pairs: w_size * 2 + K_NEG * w_size * 2 = (K_NEG + 1) * w_size * 2
    for idx, widx in tqdm(enumerate(widx_seq)):
        positive_pairs = []
        negative_pairs = []
        # We create the start and end indices as in range(start, end)
        start_idx = max(0, idx - w_size)
        end_idx = min(idx + w_size + 1, len(widx_seq))
        # We create pairs from the left context: start_idx -> idx and from the right context idx + 1 -> end_idx
        for c_idx in [*range(start_idx, idx), *range(idx + 1, end_idx)]:
            positive_pairs += [(widx_seq[idx], widx_seq[c_idx])]
            negative_pairs += [(widx_seq[idx], widx_seq[randint(0, len(widx_seq) - 1)]) for _ in range(K_NEG)]
        pairs = positive_pairs + negative_pairs
        X = np.array(pairs)
        X_i = X[:, 0]
        X_c = X[:, 1] 
        y = [1] * len(positive_pairs) + [0] * len(negative_pairs)
        y = np.array(y)
        yield X_i, X_c, y"""

'def minibatch_generator(widx_seq):\n    # A batch consists of the positive pairs generated by a word and its context\n    # and the negative pairs: w_size * 2 + K_NEG * w_size * 2 = (K_NEG + 1) * w_size * 2\n    for idx, widx in tqdm(enumerate(widx_seq)):\n        positive_pairs = []\n        negative_pairs = []\n        # We create the start and end indices as in range(start, end)\n        start_idx = max(0, idx - w_size)\n        end_idx = min(idx + w_size + 1, len(widx_seq))\n        # We create pairs from the left context: start_idx -> idx and from the right context idx + 1 -> end_idx\n        for c_idx in [*range(start_idx, idx), *range(idx + 1, end_idx)]:\n            positive_pairs += [(widx_seq[idx], widx_seq[c_idx])]\n            negative_pairs += [(widx_seq[idx], widx_seq[randint(0, len(widx_seq) - 1)]) for _ in range(K_NEG)]\n        pairs = positive_pairs + negative_pairs\n        X = np.array(pairs)\n        X_i = X[:, 0]\n        X_c = X[:, 1] \n        y = [1] * len(pos

We build two inputs: The left input is the input word and the right one is a context word.

## The Architecture

And now the architecture

In [34]:
i_word = Input(shape=(1,))
i_embedding = Embedding(vocab_size, embedding_dim, input_length=1)(i_word)

c_word = Input(shape=(1,))
c_embedding = Embedding(vocab_size, embedding_dim, input_length=1)(c_word)

dot_prod = Dot(axes=-1, normalize=True)([i_embedding, c_embedding])
output = Dense(1, activation='sigmoid')(dot_prod)
model = Model([i_word, c_word], output)
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 1)]          0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, 1)]          0                                            
__________________________________________________________________________________________________
embedding (Embedding)           (None, 1, 100)       3522100     input_1[0][0]                    
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, 1, 100)       3522100     input_2[0][0]                    
______________________________________________________________________________________________

## Training the Model

The function to measure the vector similarity

In [35]:
def most_sim_vecs(vector, U, nbr_words=10):
    # Here cosine distance and not cosine
    # distance between equal vectors: 0. max distance: 2
    dist = [cosine(vector, U[i, :]) if np.any(U[i, :]) else 2
            for i in range(U.shape[0])]
    sorted_vectors = sorted(range(len(dist)), key=lambda k: dist[k])
    return sorted_vectors[1:nbr_words + 1]

## The Loss

In [36]:
def loss_neg(y_true, y_hat):
    y_true = tf.cast(y_true, tf.float32)
    log_y_hat_1 = tf.math.log(y_hat)
    log_y_hat_0 = tf.math.log(1.0 - y_hat)
    loss = tf.math.add(tf.math.multiply(y_true, log_y_hat_1),
                       tf.math.multiply(1.0 - y_true, log_y_hat_0))
    loss = -tf.reduce_sum(loss)
    return loss

In [37]:
model.compile(loss=loss_neg, optimizer='rmsprop')#, run_eagerly=True)

In [38]:
EPOCHS = 2

In [None]:
for epoch in range(EPOCHS):
    print("Epoch {}/{}".format(epoch + 1, EPOCHS))
    for i, batch in enumerate(minibatch_generator(widx_seq)):
        X_i, X_c, y = batch
        loss = model.train_on_batch([X_i, X_c], y)
    print('Loss:', loss)
    vectors = model.get_weights()[0]
    most_sim_words = {}
    for w in test_words:
        most_sim_words[w] = most_sim_vecs(vectors[word2idx[w]], vectors)
        most_sim_words[w] = list(map(idx2word.get, most_sim_words[w]))
        print(w, most_sim_words[w])

0it [00:00, ?it/s]

Epoch 1/2


369373it [2:01:59, 50.43it/s]

In [None]:
"""Loss: 0.4462618827819824
he ['she', 'they', 'it', 'ulysses', 'amopaon', 'i', 'bringing', 'lifting', 'achilles', 'manage']
she ['he', 'ulysses', 'minerva', 'they', 'i', 'telemachus', 'achilles', 'bringing', 'vulcan', 'glad']
ulysses ['telemachus', 'penelope', 'achilles', 'menelaus', 'she', 'minerva', 'agamemnon', 'hector', 'jove', 'apollo']
penelope ['telemachus', 'ulysses', 'dear', 'jove', 'father', 'husband', 'mother', 'menelaus', 'wife', 'unhappy']
achaeans ['trojans', 'others', 'gods', 'danaans', 'outer', 'suitors', 'argives', 'gates', 'other', 'ships']

12it [00:00, 113.77it/s]

trojans ['suitors', 'achaeans', 'sea', 'others', 'winds', 'battlements', 'corpses', 'ships', 'night', 'argives']
Epoch 2/10

272712it [39:46, 114.27it/s]

Loss: 0.4378413259983063
he ['she', 'they', 'achilles', 'it', 'i', 'somehow', 'mouthed', 'healed', 'gortyn', 'we']
she ['he', 'minerva', 'they', 'telemachus', 'ulysses', 'i', 'sun', 'sidon', 'achilles', 'eumaeus']
ulysses ['telemachus', 'penelope', 'menelaus', 'achilles', 'agamemnon', 'minerva', 'ajax', 'hector', 'neptune', 'alcinous']
penelope ['telemachus', 'eumaeus', 'euryclea', 'ulysses', 'father', 'dear', 'mother', 'juno', 'unhappy', 'alcinous']
achaeans ['trojans', 'gods', 'others', 'suitors', 'danaans', 'phaeacians', 'we', 'argives', 'corpses', 'dogs']

13it [00:00, 117.94it/s]

trojans ['achaeans', 'others', 'suitors', 'argives', 'themselves', 'ships', 'sea', 'corpses', 'night', 'battlements']
Epoch 3/10
"""

In [None]:
"""Loss: 9.454566955566406
10 iter.
he ['they', 'she', 'minerva', 'adraste', 'fee', 'parleying', 'i', 'harden', 'have', 'seat']
she ['they', 'he', 'alcyone', 'minerva', 'helen', 'fickle', 'juno', 'sunium', 'heaven', 'penelope']
ulysses ['telemachus', 'penelope', 'antinous', 'menelaus', 'aegisthus', 'mentor', 'eurymachus', 'alcinous', 'theoclymenus', 'mercury']
penelope ['nausicaa', 'telemachus', 'madam', 'antinous', 'father', 'euryclea', 'mercury', 'menelaus', 'noemon', 'mentor']
achaeans ['trojans', 'argives', 'suitors', 'gods', 'danaans', 'ships', 'sea', 'others', 'other', 'ground']
trojans ['achaeans', 'argives', 'suitors', 'gods', 'ships', 'ground', 'sea', 'danaans', 'others', 'wall']
"""

In [None]:
"""
neg_loss and 3 epochs
he ['she', 'corrections', 'quainter', 'tortuously', 'they', 'disgusts', 'legislature', 'decorously', 'applicants', 'topple']
she ['he', 'florence', 'they', 'wastin', 'underhanded', 'everybody', 'powerfully', 'estella', 'bella', 'nobody']
paris ['france', 'england', 'requisition', 'despair', 'italy', 'amazement', 'parliament', 'dust', 'convulsions', 'lincolnshire']
london ['england', 'town', 'italy', 'france', 'vain', 'paris', 'lincolnshire', 'buckingham', 'itself', 'due']
table ['window', 'fire', 'ground', 'wall', 'floor', 'sofa', 'box', 'road', 'carriage', 'desk']
rare ['female', 'singular', 'monstrous', 'special', 'mere', 'moral', 'terrible', 'genteel', 'common', 'remarkable']
monday ['wednesday', 'horseback', 'floor', 'thursday', 'board', 'tiptoe', 'sides', 'entering', 'saturday', 'sunday']
sunday ['saturday', 'summer', 'post', 'coffee', 'board', 'stone', 'monday', 'wall', 'green', 'garden']
man ['gentleman', 'woman', 'lady', 'person', 'boy', 'child', 'girl', 'dog', 'fellow', 'creature']
woman ['lady', 'girl', 'gentleman', 'creature', 'man', 'fellow', 'person', 'boy', 'child', 'servant']
king ['clergyman', 'maker', 'baker', 'cook', 'pawnbroker', 'chandler', 'attorney', 'bride', 'knights', 'mayor']
aqueen ['forest', 'tumult', 'pile', 'roast', 'professor', 'soot', 'combination', 'mixture', 'bunch', 'depression']
boy ['girl', 'child', 'lady', 'servant', 'woman', 'gentleman', 'doctor', 'baby', 'house', 'fellow']"""

In [None]:
"""
neg_loss and 4 epochs, downsampling
Loss: 9.66569995880127
he ['cunningly', 'nigh', 'quick', 'trachis', 'despairingly', 'lesson', 'withdrew', 'she', 'keenly', 'bewail']
she ['attempt', 'overtakes', 'aloud', 'piloted', 'gush', 'hawk', 'he', 'nausicaa', 'meliboea', 'thrash']
ulysses ['swineherd', 'tried', 'indisputable', 'happen', 'invincible', 'arrive', 'accompanied', 'stains', 'cyclopes', 'rouse']
penelope ['nurse', 'desire', 'telemachus', 'madam', 'stockman', 'angrily', 'reminds', 'eurynome', 'troubled', 'alas']
achaeans ['danaans', 'argives', 'oracles', 'faint', 'pick', 'avail', 'knowledge', 'useful', 'people', 'unharnessed']
trojans ['wander', 'perilous', 'erembians', 'maniac', 'evils', 'undaunted', 'argives', 'thwart', 'violate', 'warding']

"""

In [None]:
"""
neg_loss and 4 epochs POW = 0.75?
he ['she', 'they', 'achilles', 'i', 'bride', 'halt', 'ruined', 'mars', 'solitary', 'muttering']
she ['minerva', 'he', 'they', 'handmaids', 'helen', 'heaven', 'maids', 'venus', 'wand', 'steps']
ulysses ['telemachus', 'eumaeus', 'aegisthus', 'alcinous', 'antinous', 'mercury', 'penelope', 'menelaus', 'nestor', 'eurymachus']
penelope ['antinous', 'eumaeus', 'telemachus', 'nurse', 'dear', 'alcinous', 'piteously', 'euryclea', 'mercury', 'leto']
achaeans ['danaans', 'trojans', 'argives', 'gods', 'others', 'suitors', 'phaeacians', 'sea', 'alone', 'closely']
trojans ['achaeans', 'argives', 'danaans', 'gods', 'others', 'sea', 'earth', 'hindered', 'themselves', 'suitors']

Loss: 9.882923126220703 POW = 1
he ['she', 'they', 'key', 'i', 'we', 'destroys', 'achilles', 'glares', 'dearest', 'mercury']
she ['he', 'minerva', 'they', 'penelope', 'melanthius', 'apollo', 'mercury', 'i', 'telemachus', 'alcyone']
ulysses ['telemachus', 'antinous', 'achilles', 'penelope', 'agamemnon', 'eumaeus', 'minerva', 'piteously', 'euryclea', 'ajax']
penelope ['telemachus', 'nurse', 'antinous', 'eumaeus', 'euryclea', 'juno', 'dear', 'apollo', 'thetis', 'menelaus']
achaeans ['trojans', 'danaans', 'argives', 'others', 'suitors', 'gods', 'phaeacians', 'myrmidons', 'immortals', 'lycians']
trojans ['achaeans', 'argives', 'danaans', 'others', 'suitors', 'gods', 'lycians', 'myrmidons', 'themselves', 'driven']

Loss: 9.43042278289795 POW = 1
he ['they', 'she', 'healed', 'dagger', 'i', 'prisoner', 'we', 'apollo', 'horsehair', 'craftiest']
she ['minerva', 'he', 'juno', 'they', 'venus', 'courtesy', 'maids', 'veil', 'penelope', 'iris']
ulysses ['telemachus', 'agamemnon', 'achilles', 'eumaeus', 'neptune', 'nestor', 'menelaus', 'alcinous', 'idomeneus', 'penelope']
penelope ['eumaeus', 'nurse', 'antinous', 'telemachus', 'euryclea', 'juno', 'neptune', 'alcinous', 'eurymachus', 'mother']
achaeans ['trojans', 'argives', 'danaans', 'suitors', 'gods', 'others', 'phaeacians', 'themselves', 'fall', 'nothing']
trojans ['achaeans', 'argives', 'danaans', 'dogs', 'themselves', 'others', 'suitors', 'fall', 'sooner', 'gods']

Loss: 11.508221626281738 POW = 0.75
he ['they', 'she', 'offal', 'gorgon', 'place', 'undisturbed', 'prizes', 'hind', 'seat', 'steaks']
she ['minerva', 'juno', 'vixen', 'arete', 'heron', 'he', 'they', 'beguiled', 'venus', 'moody']
ulysses ['mercury', 'telemachus', 'penelope', 'antinous', 'leto', 'alcinous', 'swineherd', 'idomeneus', 'eumaeus', 'diomed']
penelope ['euryclea', 'eumaeus', 'antinous', 'nurse', 'leto', 'alcinous', 'mentor', 'telemachus', 'dear', 'juno']
achaeans ['argives', 'trojans', 'danaans', 'gods', 'suitors', 'others', 'ships', 'fighting', 'day', 'battle']
trojans ['achaeans', 'argives', 'danaans', 'ships', 'themselves', 'lycians', 'ground', 'sea', 'heroes', 'dogs']
Loss: 9.757258415222168 POW = 0.75
he ['they', 'she', 'it', 'dipping', 'targets', 'journey', 'devil', 'sunium', 'safely', 'democoon']
she ['they', 'he', 'minerva', 'simple', 'juno', 'maids', 'slipped', 'calypso', 'husband', 'overboard']
ulysses ['penelope', 'telemachus', 'antinous', 'menelaus', 'glad', 'alcinous', 'eumaeus', 'euryclea', 'eurymachus', 'nestor']
penelope ['telemachus', 'antinous', 'eumaeus', 'alcinous', 'nurse', 'euryclea', 'ulysses', 'thetis', 'mercury', 'dear']
achaeans ['danaans', 'trojans', 'argives', 'suitors', 'gods', 'others', 'immortals', 'sea', 'ships', 'myrmidons']
trojans ['achaeans', 'danaans', 'argives', 'ships', 'suitors', 'earth', 'fighting', 'sea', 'themselves', 'immortals']

Loss: 7.38845682144165 POW = 0.75 dot norm
he ['she', 'they', 'minerva', 'scoundrel', 'i', 'achilles', 'we', 'apollo', 'ineffable', 'exposed']
she ['he', 'minerva', 'juno', 'sun', 'they', 'phemius', 'venus', 'euryalus', 'penelope', 'apollo']
ulysses ['achilles', 'telemachus', 'antinous', 'diomed', 'swineherd', 'antilochus', 'penelope', 'agamemnon', 'hector', 'menelaus']
penelope ['euryclea', 'telemachus', 'dear', 'nurse', 'alcinous', 'answered', 'said', 'queen', 'o', 'eumaeus']
achaeans ['trojans', 'danaans', 'argives', 'gods', 'suitors', 'others', 'immortals', 'sun', 'phaeacians', 'lycians']
trojans ['achaeans', 'danaans', 'argives', 'others', 'lycians', 'dogs', 'suitors', 'immortals', 'gods', 'main']

Loss: 9.18224811553955 POW = 0.75 dot norm
he ['she', 'they', 'i', 'aim', 'penelope', 'glad', 'antinous', 'minerva', 'sun', 'adrestus']
she ['he', 'minerva', 'juno', 'sarpedon', 'sun', 'they', 'venus', 'antinous', 'obeyed', 'maids']
ulysses ['antinous', 'swineherd', 'ajax', 'menelaus', 'diomed', 'eurymachus', 'agamemnon', 'alcinous', 'eumaeus', 'achilles']
penelope ['dear', 'father', 'eumaeus', 'telemachus', 'euryclea', 'nurse', 'piteously', 'mother', 'neighbour', 'antinous']
achaeans ['danaans', 'trojans', 'gods', 'argives', 'others', 'phaeacians', 'immortals', 'suitors', 'we', 'birds']
trojans ['achaeans', 'argives', 'danaans', 'suitors', 'phaeacians', 'themselves', 'lycians', 'immortals', 'myrmidons', 'gods']
"""

In [None]:
"""
Loss: 0.3799111843109131
cross entropy, 4 epochs
he ['she', 'permitted', 'they', 'maris', 'accustomed', 'swooping', 'curiously', 'abydos', 'kneading', 'eilesium']
she ['minerva', 'they', 'juno', 'he', 'her', 'venus', 'empty', 'iris', 'husband', 'permitted']
ulysses ['penelope', 'telemachus', 'alcinous', 'eurymachus', 'eumaeus', 'antinous', 'mercury', 'leto', 'nausicaa', 'euryclea']
penelope ['eumaeus', 'euryclea', 'telemachus', 'antinous', 'dear', 'nausicaa', 'alcinous', 'nurse', 'eurymachus', 'ulysses']
achaeans ['trojans', 'danaans', 'others', 'argives', 'gods', 'sea', 'ships', 'suitors', 'forever', 'yourselves']
trojans ['achaeans', 'argives', 'danaans', 'ground', 'others', 'gods', 'sea', 'suitors', 'ships', 'earth']

Loss: 0.42960622906684875
he ['she', 'they', 'cooling', 'we', 'wain', 'i', 'pretty', 'morn', 'reeds', 'scream']
she ['he', 'minerva', 'they', 'achilles', 'calypso', 'iris', 'euryclea', 'thersites', 'mercury', 'maids']
ulysses ['telemachus', 'antinous', 'agamemnon', 'achilles', 'alcinous', 'penelope', 'menelaus', 'nestor', 'euryclea', 'eumaeus']
penelope ['telemachus', 'eumaeus', 'nurse', 'euryclea', 'dear', 'antinous', 'juno', 'alcinous', 'menelaus', 'sir']
achaeans ['argives', 'danaans', 'trojans', 'others', 'suitors', 'sun', 'gods', 'immortals', 'phaeacians', 'lycians']
trojans ['achaeans', 'argives', 'danaans', 'plain', 'themselves', 'break', 'grow', 'suitors', 'ships', 'mountains']

"""