# A simple implementation of skipgrams with negative sampling
Author: Pierre Nugues

Adapted from _Distributed Representations of Words and Phrases and their Compositionality_, Sect. 2.2, by Mikolov et al. 2013.

The imports

In [1]:
import tensorflow as tf
from tensorflow.keras import backend
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Embedding, Lambda, Average, GlobalAveragePooling1D, Dot, Input, Reshape, Activation
import regex as re
import os
from tensorflow.keras.utils import to_categorical
import numpy as np
from scipy.spatial.distance import cosine
from tqdm import tqdm
from random import shuffle, randint
from collections import Counter
import math, random

## Parameters

The embedding size, context size, and negative counts

In [2]:
embedding_dim = 100
w_size = 2
c_size = w_size * 2 + 1
K_NEG = 5
t = 1e-3
power = 0.75

## The Corpus

We select a dataset and execute locally or on colab

In [3]:
dataset = 'homer'  # 'homer' dickens' 'selma' 'big'
colab = False # On my machine or on colab
debug = False
DOWNSAMPLING = False

In [4]:
if colab:
    BASE_PATH = '/content/drive/My Drive/Colab Notebooks/'
else:
    BASE_PATH = '../../../'

In [5]:
if colab:
    from google.colab import drive
    drive.mount('/content/drive')

We read the files from a folder

In [6]:
def get_files(dir, suffix):
    """
    Returns all the files in a folder ending with suffix
    :param dir:
    :param suffix:
    :return: the list of file names
    """
    files = []
    for file in os.listdir(dir):
        if file.endswith(suffix):
            files.append(file)
    return files


def load_corpus(path):
    files = get_files(path, 'txt')
    files = [path + file for file in files]
    print(files)
    text = ''
    for file in files:
        text += open(file).read()
    return text

In [7]:
if dataset == 'homer':
    #text = 'Sing, O goddess, the anger of Achilles son of Peleus'.lower()
    text1 = open(BASE_PATH + 'corpus/iliad.mb.txt', encoding='utf-8').read().lower()
    text2 = open(BASE_PATH + 'corpus/odyssey.mb.txt', encoding='utf-8').read().lower()
    text = text1 + text2
    test_words = ['he', 'she', 'ulysses', 'penelope', 'achaeans', 'trojans']
if dataset == 'dickens':
    path = BASE_PATH + 'corpus/Dickens/'
    text = load_corpus(path)
    test_words = ['he', 'she', 'paris', 'london', 'table', 'rare', 'monday', 'sunday', 'man', 'woman', 'king', 'queen', 'boy',
                  'girl']
elif dataset == 'selma':
    path = BASE_PATH + 'corpus/Selma/'
    text = load_corpus(path)
    test_words = ['han', 'hon', 'att', 'bord', 'bordet', 'måndag', 'söndag', 'man', 'kvinna', 'kung', 'drottning',
                  'pojke', 'flicka']
elif dataset == 'big':
    path = BASE_PATH + 'corpus/Dickens/'
    text = load_corpus(path)
    path = BASE_PATH + 'corpus/Norvig/'
    text += load_corpus(path)
    test_words = ['he', 'she', 'paris', 'london', 'table', 'rare', 'monday', 'sunday', 'man', 'woman', 'king', 'queen', 'boy',
                  'girl']    

## Processing the Corpus

### Tokenizing

We set all the text in lowercase

In [8]:
text = text.lower()
word_seq = re.findall('\p{L}+', text)
word_seq[:5]

['book', 'i', 'the', 'quarrel', 'between']

### Downsampling

We can downsample the frequent words. We first count the words, then we discard randomly some words in the text, depending on their frequency. Frequent words will often be discarded. Rare words, never. We will have to count them again after sampling.

In [9]:
counts = Counter(word_seq)
word_cnt = sum(counts.values())
word_cnt

272712

In [10]:
counts['the'], counts['he'], counts['penelope']

(15905, 4746, 110)

The discard probability threshold, following § 2.3 of the paper

In [11]:
discard_probs = dict(counts)
for key in discard_probs:
    discard_probs[key] = max(0, 1 - math.sqrt(t/(counts[key]/word_cnt)))

In [12]:
discard_probs['the'], discard_probs['he'], discard_probs['penelope']

(0.8690560952429589, 0.7602888379452187, 0)

In [13]:
subsampled_word_seq = []
for word in word_seq:
    if discard_probs[word] < np.random.random():
        subsampled_word_seq += [word]

In [14]:
if DOWNSAMPLING:
    word_seq = subsampled_word_seq

### Counting the words

In [15]:
counts = Counter(word_seq)
word_cnt = sum(counts.values())
word_cnt

272712

In [16]:
counts['the'], counts['he'], counts['penelope']

(15905, 4746, 110)

In [17]:
counts['the']/word_cnt, counts['he']/word_cnt, counts['penelope']/word_cnt

(0.05832159934289653, 0.01740297456657573, 0.00040335592126492415)

We extract the unique words

In [18]:
unique_words = sorted(list(counts.keys()))
unique_words[:10]

['a',
 'abantes',
 'abarbarea',
 'abas',
 'abate',
 'abated',
 'abetting',
 'abhorred',
 'abians',
 'abide']

In [19]:
vocab_size = len(unique_words)
vocab_size

9725

### Indices

And we create indices

In [20]:
word2idx = {word: i for (i, word) in enumerate(unique_words)}
idx2word = {v: k for k, v in word2idx.items()}
#word2idx

We map the words to their indices and we get the sequence of word indices

In [21]:
widx_seq = list(map(word2idx.get, word_seq))
widx_seq[:5]

[1037, 4334, 8518, 6666, 897]

### Power transform

We apply a power tranform to a list of counts and we return power transformed probabilities:
$$
\frac{\text{cnt}(w)^\text{power}}{\sum_i \text{cnt}(w_i)^\text{power}}
$$

In [22]:
def power_transform(counts, power):
    trfmd_probs = dict()
    for word in counts:
        trfmd_probs[word] = math.pow(counts[word], power)
    sum_probs = sum(trfmd_probs.values())
    for word in trfmd_probs:
        trfmd_probs[word] /= sum_probs
    return trfmd_probs

In [23]:
trfmd_probs = power_transform(counts, power)

In [24]:
trfmd_probs['the'], trfmd_probs['he'], trfmd_probs['penelope']

(0.020224400021262735, 0.008165282068782646, 0.00048503145644814705)

### Negative sampling
For each positive pair, and word and a context word, we draw $k$ words randomly to form negative pairs.

We build the index and probability lists for the random choice function.

In [25]:
trfmd_probs_idx = {word2idx[k]: v for k, v in trfmd_probs.items()}

`random.choices` needs the index and the probabilities

In [26]:
draw_idx, probs = zip(*trfmd_probs_idx.items())

Given the words in the context, we draw $k$ as many words.

In [27]:
random.choices(draw_idx, weights=probs, k=K_NEG * 2 * w_size)

[1863,
 2262,
 2334,
 4661,
 9616,
 1486,
 3213,
 2493,
 3970,
 2313,
 1700,
 7251,
 2313,
 4977,
 184,
 3521,
 3200,
 9241,
 3288,
 1054]

## The pairs

For all the words, we form positive and negative pairs. We extract the context words of a word from its neighbors in the word sequence to form the positive pairs and at random to form the negative ones.

In [28]:
def minibatch_generator(widx_seq):
    # A batch consists of the positive pairs generated
    # by a word and its context and the negative pairs:
    # w_size * 2 + K_NEG * w_size * 2 = (K_NEG + 1) * w_size * 2
    for idx, widx in tqdm(enumerate(widx_seq[w_size:-w_size], w_size)):
        # We create the start and end indices as in range(start, end)
        start_idx = idx - w_size
        end_idx = idx + w_size + 1
        # We create pairs from the left context: start_idx -> idx
        # and from the right context idx + 1 -> end_idx
        # The input word
        X_i = [widx_seq[idx]] * (K_NEG + 1) * 2 * w_size
        # The context words
        X_c = [widx_seq[c_idx] for c_idx in
               [*range(start_idx, idx), *range(idx + 1, end_idx)]]
        # The random words to form negative pairs
        X_c += random.choices(draw_idx, weights=probs,
                              k=K_NEG * 2 * w_size)
        y = [1] * w_size * 2 + [0] * w_size * 2 * K_NEG
        y = np.array(y)
        X_i = np.array(X_i)
        X_c = np.array(X_c)
        yield X_i, X_c, y

## The Architecture

And now the architecture. We build two inputs: The left input is the input word and the right one is a context word. This corresponds to two kinds of embeddings input and context.

In [29]:
i_word = Input(shape=(1,))
i_embedding = Embedding(vocab_size,
                        embedding_dim,
                        input_length=1)(i_word)

c_word = Input(shape=(1,))
c_embedding = Embedding(vocab_size,
                        embedding_dim,
                        input_length=1)(c_word)

dot_prod = Dot(axes=-1)([i_embedding, c_embedding])  # normalize=True
output = Dense(1, activation='sigmoid')(dot_prod)
model = Model([i_word, c_word], output)
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 1)]          0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, 1)]          0                                            
__________________________________________________________________________________________________
embedding (Embedding)           (None, 1, 100)       972500      input_1[0][0]                    
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, 1, 100)       972500      input_2[0][0]                    
______________________________________________________________________________________________

## The Loss
As defined in § 2.2

In [30]:
def loss_neg(y_true, y_hat):
    y_true = tf.cast(y_true, tf.float32)
    log_y_hat_1 = tf.math.log(y_hat) 
    log_y_hat_0 = tf.math.log(1.0 - y_hat)
    loss = tf.math.add(tf.math.multiply(y_true, log_y_hat_1),
                       tf.math.multiply(1.0 - y_true, log_y_hat_0))
    loss = -tf.reduce_sum(loss)
    return loss

In [31]:
model.compile(loss=loss_neg, optimizer='rmsprop')#, run_eagerly=True)

## Training the Model

The function to measure the vector similarity

In [32]:
def most_sim_vecs(vector, U, nbr_words=10):
    # Here cosine distance and not cosine
    # distance between equal vectors: 0. max distance: 2
    dist = [cosine(vector, U[i, :]) if np.any(U[i, :]) else 2
            for i in range(U.shape[0])]
    sorted_vectors = sorted(range(len(dist)), key=lambda k: dist[k])
    return sorted_vectors[1:nbr_words + 1]

In [33]:
EPOCHS = 2

In [34]:
for epoch in range(EPOCHS):
    print("Epoch {}/{}".format(epoch + 1, EPOCHS))
    for i, batch in enumerate(minibatch_generator(widx_seq)):
        X_i, X_c, y = batch
        loss = model.train_on_batch([X_i, X_c], y)
    print('Loss:', loss)
    embeddings = model.get_weights()[0]
    most_sim_words = {}
    for w in test_words:
        most_sim_words[w] = most_sim_vecs(embeddings[word2idx[w]], embeddings)
        most_sim_words[w] = list(map(idx2word.get, most_sim_words[w]))
        print(w, most_sim_words[w])

0it [00:00, ?it/s]

Epoch 1/2


272708it [35:38, 127.51it/s]


Loss: 9.868846893310547
he ['she', 'we', 'they', 'recognise', 'minerva', 'plied', 'sports', 'it', 'pleases', 'i']
she ['minerva', 'he', 'juno', 'penelope', 'seemly', 'ulysses', 'they', 'euryclea', 'piloted', 'hector']
ulysses ['telemachus', 'eumaeus', 'antinous', 'penelope', 'mercury', 'minerva', 'euryclea', 'began', 'him', 'king']
penelope ['telemachus', 'dear', 'neptune', 'father', 'hector', 'ulysses', 'menelaus', 'eumaeus', 'apollo', 'juno']
achaeans ['gods', 'suitors', 'trojans', 'danaans', 'others', 'immortals', 'phaeacians', 'argives', 'myrmidons', 'we']


14it [00:00, 132.20it/s]

trojans ['achaeans', 'suitors', 'danaans', 'others', 'argives', 'other', 'plain', 'sea', 'themselves', 'earth']
Epoch 2/2


272708it [34:26, 132.00it/s]


Loss: 9.982368469238281
he ['she', 'they', 'overtook', 'ulysses', 'lowing', 'minerva', 'heinous', 'flits', 'dispersed', 'breaking']
she ['he', 'they', 'minerva', 'steps', 'penelope', 'seemly', 'mercury', 'juno', 'piloted', 'athlete']
ulysses ['telemachus', 'eumaeus', 'penelope', 'antinous', 'mercury', 'euryclea', 'menelaus', 'minerva', 'straightening', 'idomeneus']
penelope ['telemachus', 'eumaeus', 'nurse', 'euryclea', 'neptune', 'antinous', 'dear', 'menelaus', 'juno', 'father']
achaeans ['trojans', 'suitors', 'danaans', 'others', 'gods', 'argives', 'sun', 'fight', 'phaeacians', 'sea']
trojans ['achaeans', 'suitors', 'danaans', 'argives', 'others', 'sea', 'themselves', 'gods', 'earth', 'fight']


### Homer

In [36]:
"""
neg_loss and 2 epochs POW = 0.75. No downsampling (2021/04/08)
Loss: 10.052577018737793
he ['they', 'she', 'we', 'it', 'seemed', 'pool', 'had', 'minerva', 'wave', 'foursquare']
she ['minerva', 'venus', 'penelope', 'juno', 'he', 'telemachus', 'they', 'loathed', 'helen', 'we']
ulysses ['telemachus', 'penelope', 'antinous', 'eumaeus', 'euryclea', 'menelaus', 'eurymachus', 'idomeneus', 'apollo', 'noemon']
penelope ['antinous', 'telemachus', 'eumaeus', 'dear', 'euryclea', 'eurymachus', 'juno', 'nurse', 'menelaus', 'minerva']
achaeans ['trojans', 'danaans', 'gods', 'others', 'argives', 'suitors', 'ships', 'sea', 'men', 'phaeacians']
trojans ['achaeans', 'argives', 'danaans', 'gods', 'others', 'other', 'plain', 'sea', 'ships', 'fighting']
"""

"\nneg_loss and 2 epochs POW = 0.75. No downsampling (2021/04/08)\nLoss: 10.052577018737793\nhe ['they', 'she', 'we', 'it', 'seemed', 'pool', 'had', 'minerva', 'wave', 'foursquare']\nshe ['minerva', 'venus', 'penelope', 'juno', 'he', 'telemachus', 'they', 'loathed', 'helen', 'we']\nulysses ['telemachus', 'penelope', 'antinous', 'eumaeus', 'euryclea', 'menelaus', 'eurymachus', 'idomeneus', 'apollo', 'noemon']\npenelope ['antinous', 'telemachus', 'eumaeus', 'dear', 'euryclea', 'eurymachus', 'juno', 'nurse', 'menelaus', 'minerva']\nachaeans ['trojans', 'danaans', 'gods', 'others', 'argives', 'suitors', 'ships', 'sea', 'men', 'phaeacians']\ntrojans ['achaeans', 'argives', 'danaans', 'gods', 'others', 'other', 'plain', 'sea', 'ships', 'fighting']\n"

In [None]:
"""Loss: 9.982368469238281
neg_loss and 2 epochs POW = 0.75. No downsampling (2021/04/09)
he ['she', 'they', 'overtook', 'ulysses', 'lowing', 'minerva', 'heinous', 'flits', 'dispersed', 'breaking']
she ['he', 'they', 'minerva', 'steps', 'penelope', 'seemly', 'mercury', 'juno', 'piloted', 'athlete']
ulysses ['telemachus', 'eumaeus', 'penelope', 'antinous', 'mercury', 'euryclea', 'menelaus', 'minerva', 'straightening', 'idomeneus']
penelope ['telemachus', 'eumaeus', 'nurse', 'euryclea', 'neptune', 'antinous', 'dear', 'menelaus', 'juno', 'father']
achaeans ['trojans', 'suitors', 'danaans', 'others', 'gods', 'argives', 'sun', 'fight', 'phaeacians', 'sea']
trojans ['achaeans', 'suitors', 'danaans', 'argives', 'others', 'sea', 'themselves', 'gods', 'earth', 'fight']
"""

In [37]:
"""
neg_loss and 2 epochs POW = 0.75. No downsampling (2021/04/08), dot normalized
Loss: 9.279374122619629
he ['she', 'they', 'i', 'minerva', 'achilles', 'glad', 'behind', 'iris', 'scylla', 'rude']
she ['he', 'minerva', 'they', 'iris', 'apollo', 'telemachus', 'eueneus', 'juno', 'euryclea', 'scylla']
ulysses ['telemachus', 'alcinous', 'antinous', 'king', 'agamemnon', 'ajax', 'achilles', 'minerva', 'apollo', 'eumaeus']
penelope ['dear', 'father', 'nurse', 'said', 'antinous', 'agamemnon', 'telemachus', 'eumaeus', 'sir', 'wife']
achaeans ['danaans', 'trojans', 'argives', 'others', 'suitors', 'gods', 'immortals', 'phaeacians', 'blessed', 'other']
trojans ['achaeans', 'danaans', 'argives', 'phaeacians', 'sun', 'immortals', 'sea', 'gods', 'suitors', 'plain']"""

"\nneg_loss and 2 epochs POW = 0.75. No downsampling (2021/04/08), dot normalized\nLoss: 9.279374122619629\nhe ['she', 'they', 'i', 'minerva', 'achilles', 'glad', 'behind', 'iris', 'scylla', 'rude']\nshe ['he', 'minerva', 'they', 'iris', 'apollo', 'telemachus', 'eueneus', 'juno', 'euryclea', 'scylla']\nulysses ['telemachus', 'alcinous', 'antinous', 'king', 'agamemnon', 'ajax', 'achilles', 'minerva', 'apollo', 'eumaeus']\npenelope ['dear', 'father', 'nurse', 'said', 'antinous', 'agamemnon', 'telemachus', 'eumaeus', 'sir', 'wife']\nachaeans ['danaans', 'trojans', 'argives', 'others', 'suitors', 'gods', 'immortals', 'phaeacians', 'blessed', 'other']\ntrojans ['achaeans', 'danaans', 'argives', 'phaeacians', 'sun', 'immortals', 'sea', 'gods', 'suitors', 'plain']"

In [38]:
"""
neg_loss and 2 epochs POW = 1. No downsampling (2021/04/08)
Loss: 10.320120811462402
he ['she', 'they', 'grip', 'wielder', 'pursuing', 'augur', 'i', 'enchantress', 'we', 'it']
she ['minerva', 'he', 'juno', 'penelope', 'they', 'telemachus', 'theoclymenus', 'i', 'eumaeus', 'agreeable']
ulysses ['telemachus', 'achilles', 'nestor', 'penelope', 'agamemnon', 'antinous', 'apollo', 'menelaus', 'neptune', 'eurymachus']
penelope ['telemachus', 'eumaeus', 'apollo', 'nurse', 'juno', 'neptune', 'antinous', 'ulysses', 'minerva', 'circe']
achaeans ['trojans', 'danaans', 'gods', 'others', 'argives', 'we', 'suitors', 'phaeacians', 'reserve', 'sun']
trojans ['achaeans', 'argives', 'danaans', 'suitors', 'myrmidons', 'gods', 'troy', 'phaeacians', 'sea', 'sighed']
"""

"\nneg_loss and 2 epochs POW = 1. No downsampling (2021/04/08)\nLoss: 10.320120811462402\nhe ['she', 'they', 'grip', 'wielder', 'pursuing', 'augur', 'i', 'enchantress', 'we', 'it']\nshe ['minerva', 'he', 'juno', 'penelope', 'they', 'telemachus', 'theoclymenus', 'i', 'eumaeus', 'agreeable']\nulysses ['telemachus', 'achilles', 'nestor', 'penelope', 'agamemnon', 'antinous', 'apollo', 'menelaus', 'neptune', 'eurymachus']\npenelope ['telemachus', 'eumaeus', 'apollo', 'nurse', 'juno', 'neptune', 'antinous', 'ulysses', 'minerva', 'circe']\nachaeans ['trojans', 'danaans', 'gods', 'others', 'argives', 'we', 'suitors', 'phaeacians', 'reserve', 'sun']\ntrojans ['achaeans', 'argives', 'danaans', 'suitors', 'myrmidons', 'gods', 'troy', 'phaeacians', 'sea', 'sighed']\n"

In [39]:
"""
neg_loss and 4 epochs POW = 0.75?
he ['she', 'they', 'achilles', 'i', 'bride', 'halt', 'ruined', 'mars', 'solitary', 'muttering']
she ['minerva', 'he', 'they', 'handmaids', 'helen', 'heaven', 'maids', 'venus', 'wand', 'steps']
ulysses ['telemachus', 'eumaeus', 'aegisthus', 'alcinous', 'antinous', 'mercury', 'penelope', 'menelaus', 'nestor', 'eurymachus']
penelope ['antinous', 'eumaeus', 'telemachus', 'nurse', 'dear', 'alcinous', 'piteously', 'euryclea', 'mercury', 'leto']
achaeans ['danaans', 'trojans', 'argives', 'gods', 'others', 'suitors', 'phaeacians', 'sea', 'alone', 'closely']
trojans ['achaeans', 'argives', 'danaans', 'gods', 'others', 'sea', 'earth', 'hindered', 'themselves', 'suitors']

Loss: 9.882923126220703 POW = 1
he ['she', 'they', 'key', 'i', 'we', 'destroys', 'achilles', 'glares', 'dearest', 'mercury']
she ['he', 'minerva', 'they', 'penelope', 'melanthius', 'apollo', 'mercury', 'i', 'telemachus', 'alcyone']
ulysses ['telemachus', 'antinous', 'achilles', 'penelope', 'agamemnon', 'eumaeus', 'minerva', 'piteously', 'euryclea', 'ajax']
penelope ['telemachus', 'nurse', 'antinous', 'eumaeus', 'euryclea', 'juno', 'dear', 'apollo', 'thetis', 'menelaus']
achaeans ['trojans', 'danaans', 'argives', 'others', 'suitors', 'gods', 'phaeacians', 'myrmidons', 'immortals', 'lycians']
trojans ['achaeans', 'argives', 'danaans', 'others', 'suitors', 'gods', 'lycians', 'myrmidons', 'themselves', 'driven']

Loss: 9.43042278289795 POW = 1
he ['they', 'she', 'healed', 'dagger', 'i', 'prisoner', 'we', 'apollo', 'horsehair', 'craftiest']
she ['minerva', 'he', 'juno', 'they', 'venus', 'courtesy', 'maids', 'veil', 'penelope', 'iris']
ulysses ['telemachus', 'agamemnon', 'achilles', 'eumaeus', 'neptune', 'nestor', 'menelaus', 'alcinous', 'idomeneus', 'penelope']
penelope ['eumaeus', 'nurse', 'antinous', 'telemachus', 'euryclea', 'juno', 'neptune', 'alcinous', 'eurymachus', 'mother']
achaeans ['trojans', 'argives', 'danaans', 'suitors', 'gods', 'others', 'phaeacians', 'themselves', 'fall', 'nothing']
trojans ['achaeans', 'argives', 'danaans', 'dogs', 'themselves', 'others', 'suitors', 'fall', 'sooner', 'gods']

Loss: 11.508221626281738 POW = 0.75
he ['they', 'she', 'offal', 'gorgon', 'place', 'undisturbed', 'prizes', 'hind', 'seat', 'steaks']
she ['minerva', 'juno', 'vixen', 'arete', 'heron', 'he', 'they', 'beguiled', 'venus', 'moody']
ulysses ['mercury', 'telemachus', 'penelope', 'antinous', 'leto', 'alcinous', 'swineherd', 'idomeneus', 'eumaeus', 'diomed']
penelope ['euryclea', 'eumaeus', 'antinous', 'nurse', 'leto', 'alcinous', 'mentor', 'telemachus', 'dear', 'juno']
achaeans ['argives', 'trojans', 'danaans', 'gods', 'suitors', 'others', 'ships', 'fighting', 'day', 'battle']
trojans ['achaeans', 'argives', 'danaans', 'ships', 'themselves', 'lycians', 'ground', 'sea', 'heroes', 'dogs']
Loss: 9.757258415222168 POW = 0.75
he ['they', 'she', 'it', 'dipping', 'targets', 'journey', 'devil', 'sunium', 'safely', 'democoon']
she ['they', 'he', 'minerva', 'simple', 'juno', 'maids', 'slipped', 'calypso', 'husband', 'overboard']
ulysses ['penelope', 'telemachus', 'antinous', 'menelaus', 'glad', 'alcinous', 'eumaeus', 'euryclea', 'eurymachus', 'nestor']
penelope ['telemachus', 'antinous', 'eumaeus', 'alcinous', 'nurse', 'euryclea', 'ulysses', 'thetis', 'mercury', 'dear']
achaeans ['danaans', 'trojans', 'argives', 'suitors', 'gods', 'others', 'immortals', 'sea', 'ships', 'myrmidons']
trojans ['achaeans', 'danaans', 'argives', 'ships', 'suitors', 'earth', 'fighting', 'sea', 'themselves', 'immortals']

Loss: 7.38845682144165 POW = 0.75 dot norm
he ['she', 'they', 'minerva', 'scoundrel', 'i', 'achilles', 'we', 'apollo', 'ineffable', 'exposed']
she ['he', 'minerva', 'juno', 'sun', 'they', 'phemius', 'venus', 'euryalus', 'penelope', 'apollo']
ulysses ['achilles', 'telemachus', 'antinous', 'diomed', 'swineherd', 'antilochus', 'penelope', 'agamemnon', 'hector', 'menelaus']
penelope ['euryclea', 'telemachus', 'dear', 'nurse', 'alcinous', 'answered', 'said', 'queen', 'o', 'eumaeus']
achaeans ['trojans', 'danaans', 'argives', 'gods', 'suitors', 'others', 'immortals', 'sun', 'phaeacians', 'lycians']
trojans ['achaeans', 'danaans', 'argives', 'others', 'lycians', 'dogs', 'suitors', 'immortals', 'gods', 'main']

Loss: 9.18224811553955 POW = 0.75 dot norm
he ['she', 'they', 'i', 'aim', 'penelope', 'glad', 'antinous', 'minerva', 'sun', 'adrestus']
she ['he', 'minerva', 'juno', 'sarpedon', 'sun', 'they', 'venus', 'antinous', 'obeyed', 'maids']
ulysses ['antinous', 'swineherd', 'ajax', 'menelaus', 'diomed', 'eurymachus', 'agamemnon', 'alcinous', 'eumaeus', 'achilles']
penelope ['dear', 'father', 'eumaeus', 'telemachus', 'euryclea', 'nurse', 'piteously', 'mother', 'neighbour', 'antinous']
achaeans ['danaans', 'trojans', 'gods', 'argives', 'others', 'phaeacians', 'immortals', 'suitors', 'we', 'birds']
trojans ['achaeans', 'argives', 'danaans', 'suitors', 'phaeacians', 'themselves', 'lycians', 'immortals', 'myrmidons', 'gods']
"""

"\nneg_loss and 4 epochs POW = 0.75?\nhe ['she', 'they', 'achilles', 'i', 'bride', 'halt', 'ruined', 'mars', 'solitary', 'muttering']\nshe ['minerva', 'he', 'they', 'handmaids', 'helen', 'heaven', 'maids', 'venus', 'wand', 'steps']\nulysses ['telemachus', 'eumaeus', 'aegisthus', 'alcinous', 'antinous', 'mercury', 'penelope', 'menelaus', 'nestor', 'eurymachus']\npenelope ['antinous', 'eumaeus', 'telemachus', 'nurse', 'dear', 'alcinous', 'piteously', 'euryclea', 'mercury', 'leto']\nachaeans ['danaans', 'trojans', 'argives', 'gods', 'others', 'suitors', 'phaeacians', 'sea', 'alone', 'closely']\ntrojans ['achaeans', 'argives', 'danaans', 'gods', 'others', 'sea', 'earth', 'hindered', 'themselves', 'suitors']\n\nLoss: 9.882923126220703 POW = 1\nhe ['she', 'they', 'key', 'i', 'we', 'destroys', 'achilles', 'glares', 'dearest', 'mercury']\nshe ['he', 'minerva', 'they', 'penelope', 'melanthius', 'apollo', 'mercury', 'i', 'telemachus', 'alcyone']\nulysses ['telemachus', 'antinous', 'achilles', 'p

In [None]:
"""
neg_loss and 4 epochs, downsampling
Loss: 9.66569995880127
he ['cunningly', 'nigh', 'quick', 'trachis', 'despairingly', 'lesson', 'withdrew', 'she', 'keenly', 'bewail']
she ['attempt', 'overtakes', 'aloud', 'piloted', 'gush', 'hawk', 'he', 'nausicaa', 'meliboea', 'thrash']
ulysses ['swineherd', 'tried', 'indisputable', 'happen', 'invincible', 'arrive', 'accompanied', 'stains', 'cyclopes', 'rouse']
penelope ['nurse', 'desire', 'telemachus', 'madam', 'stockman', 'angrily', 'reminds', 'eurynome', 'troubled', 'alas']
achaeans ['danaans', 'argives', 'oracles', 'faint', 'pick', 'avail', 'knowledge', 'useful', 'people', 'unharnessed']
trojans ['wander', 'perilous', 'erembians', 'maniac', 'evils', 'undaunted', 'argives', 'thwart', 'violate', 'warding']
"""

In [40]:
"""
Loss: 0.3799111843109131
cross entropy, 4 epochs
he ['she', 'permitted', 'they', 'maris', 'accustomed', 'swooping', 'curiously', 'abydos', 'kneading', 'eilesium']
she ['minerva', 'they', 'juno', 'he', 'her', 'venus', 'empty', 'iris', 'husband', 'permitted']
ulysses ['penelope', 'telemachus', 'alcinous', 'eurymachus', 'eumaeus', 'antinous', 'mercury', 'leto', 'nausicaa', 'euryclea']
penelope ['eumaeus', 'euryclea', 'telemachus', 'antinous', 'dear', 'nausicaa', 'alcinous', 'nurse', 'eurymachus', 'ulysses']
achaeans ['trojans', 'danaans', 'others', 'argives', 'gods', 'sea', 'ships', 'suitors', 'forever', 'yourselves']
trojans ['achaeans', 'argives', 'danaans', 'ground', 'others', 'gods', 'sea', 'suitors', 'ships', 'earth']

Loss: 0.42960622906684875
he ['she', 'they', 'cooling', 'we', 'wain', 'i', 'pretty', 'morn', 'reeds', 'scream']
she ['he', 'minerva', 'they', 'achilles', 'calypso', 'iris', 'euryclea', 'thersites', 'mercury', 'maids']
ulysses ['telemachus', 'antinous', 'agamemnon', 'achilles', 'alcinous', 'penelope', 'menelaus', 'nestor', 'euryclea', 'eumaeus']
penelope ['telemachus', 'eumaeus', 'nurse', 'euryclea', 'dear', 'antinous', 'juno', 'alcinous', 'menelaus', 'sir']
achaeans ['argives', 'danaans', 'trojans', 'others', 'suitors', 'sun', 'gods', 'immortals', 'phaeacians', 'lycians']
trojans ['achaeans', 'argives', 'danaans', 'plain', 'themselves', 'break', 'grow', 'suitors', 'ships', 'mountains']

"""

"\nLoss: 0.3799111843109131\ncross entropy, 4 epochs\nhe ['she', 'permitted', 'they', 'maris', 'accustomed', 'swooping', 'curiously', 'abydos', 'kneading', 'eilesium']\nshe ['minerva', 'they', 'juno', 'he', 'her', 'venus', 'empty', 'iris', 'husband', 'permitted']\nulysses ['penelope', 'telemachus', 'alcinous', 'eurymachus', 'eumaeus', 'antinous', 'mercury', 'leto', 'nausicaa', 'euryclea']\npenelope ['eumaeus', 'euryclea', 'telemachus', 'antinous', 'dear', 'nausicaa', 'alcinous', 'nurse', 'eurymachus', 'ulysses']\nachaeans ['trojans', 'danaans', 'others', 'argives', 'gods', 'sea', 'ships', 'suitors', 'forever', 'yourselves']\ntrojans ['achaeans', 'argives', 'danaans', 'ground', 'others', 'gods', 'sea', 'suitors', 'ships', 'earth']\n\nLoss: 0.42960622906684875\nhe ['she', 'they', 'cooling', 'we', 'wain', 'i', 'pretty', 'morn', 'reeds', 'scream']\nshe ['he', 'minerva', 'they', 'achilles', 'calypso', 'iris', 'euryclea', 'thersites', 'mercury', 'maids']\nulysses ['telemachus', 'antinous', '

### Dickens

In [41]:
"""
neg_loss and 3 epochs
he ['she', 'corrections', 'quainter', 'tortuously', 'they', 'disgusts', 'legislature', 'decorously', 'applicants', 'topple']
she ['he', 'florence', 'they', 'wastin', 'underhanded', 'everybody', 'powerfully', 'estella', 'bella', 'nobody']
paris ['france', 'england', 'requisition', 'despair', 'italy', 'amazement', 'parliament', 'dust', 'convulsions', 'lincolnshire']
london ['england', 'town', 'italy', 'france', 'vain', 'paris', 'lincolnshire', 'buckingham', 'itself', 'due']
table ['window', 'fire', 'ground', 'wall', 'floor', 'sofa', 'box', 'road', 'carriage', 'desk']
rare ['female', 'singular', 'monstrous', 'special', 'mere', 'moral', 'terrible', 'genteel', 'common', 'remarkable']
monday ['wednesday', 'horseback', 'floor', 'thursday', 'board', 'tiptoe', 'sides', 'entering', 'saturday', 'sunday']
sunday ['saturday', 'summer', 'post', 'coffee', 'board', 'stone', 'monday', 'wall', 'green', 'garden']
man ['gentleman', 'woman', 'lady', 'person', 'boy', 'child', 'girl', 'dog', 'fellow', 'creature']
woman ['lady', 'girl', 'gentleman', 'creature', 'man', 'fellow', 'person', 'boy', 'child', 'servant']
king ['clergyman', 'maker', 'baker', 'cook', 'pawnbroker', 'chandler', 'attorney', 'bride', 'knights', 'mayor']
aqueen ['forest', 'tumult', 'pile', 'roast', 'professor', 'soot', 'combination', 'mixture', 'bunch', 'depression']
boy ['girl', 'child', 'lady', 'servant', 'woman', 'gentleman', 'doctor', 'baby', 'house', 'fellow']"""

"\nneg_loss and 3 epochs\nhe ['she', 'corrections', 'quainter', 'tortuously', 'they', 'disgusts', 'legislature', 'decorously', 'applicants', 'topple']\nshe ['he', 'florence', 'they', 'wastin', 'underhanded', 'everybody', 'powerfully', 'estella', 'bella', 'nobody']\nparis ['france', 'england', 'requisition', 'despair', 'italy', 'amazement', 'parliament', 'dust', 'convulsions', 'lincolnshire']\nlondon ['england', 'town', 'italy', 'france', 'vain', 'paris', 'lincolnshire', 'buckingham', 'itself', 'due']\ntable ['window', 'fire', 'ground', 'wall', 'floor', 'sofa', 'box', 'road', 'carriage', 'desk']\nrare ['female', 'singular', 'monstrous', 'special', 'mere', 'moral', 'terrible', 'genteel', 'common', 'remarkable']\nmonday ['wednesday', 'horseback', 'floor', 'thursday', 'board', 'tiptoe', 'sides', 'entering', 'saturday', 'sunday']\nsunday ['saturday', 'summer', 'post', 'coffee', 'board', 'stone', 'monday', 'wall', 'green', 'garden']\nman ['gentleman', 'woman', 'lady', 'person', 'boy', 'child

In [42]:
""" 
Loss: 10.337905883789062 POW = 0.75 dot norm 2 epochs
he ['she', 'they', 'i', 'we', 'nicholas', 'everybody', 'nobody', 'it', 'newman', 'agglomeration']
she ['he', 'they', 'i', 'barbara', 'we', 'nicholas', 'everybody', 'fanny', 'husband', 'kate']
paris ['london', 'gallery', 'centre', 'stationed', 'park', 'capitol', 'village', 'kitchen', 'execution', 'refreshments']
london ['kitchen', 'street', 'town', 'city', 'england', 'front', 'market', 'church', 'hall', 'west']
table ['sofa', 'floor', 'desk', 'window', 'fire', 'wall', 'staircase', 'ground', 'fender', 'road']
rare ['remarkable', 'serious', 'taste', 'genteel', 'romantic', 'ingenious', 'gentlemanly', 'considerable', 'deal', 'alarming']
monday ['saturday', 'sunday', 'dinner', 'next', 'cathedral', 'morrow', 'floor', 'gazing', 'nine', 'previous']
sunday ['saturday', 'summer', 'day', 'cathedral', 'monday', 'week', 'neighbouring', 'stage', 'stone', 'clock']
man ['gentleman', 'woman', 'lady', 'person', 'girl', 'fellow', 'boy', 'young', 'devil', 'creature']
woman ['man', 'lady', 'gentleman', 'girl', 'creature', 'fellow', 'young', 'boy', 'person', 'widow']
king ['st', 'doctor', 'ghost', 'queen', 'schoolmaster', 'sol', 'bench', 'saracen', 'pastry', 'master']
queen ['king', 'clergyman', 'landlady', 'bunch', 'mixture', 'labyrinth', 'mace', 'masterly', 'species', 'perpetual']
boy ['child', 'girl', 'fellow', 'lad', 'woman', 'gentleman', 'man', 'creature', 'lady', 'prisoner']
girl ['boy', 'fellow', 'child', 'woman', 'creature', 'lady', 'man', 'gentleman', 'lad', 'poor']

Epoch 2/2

3355452it [18:54:19, 49.30it/s]

Loss: 10.741490364074707 POW = 0.75 dot no norm 2 epochs
he ['she', 'they', 'unwarrantably', 'gilliflower', 'administer', 'gaieties', 'graymarsh', 'phil', 'ceases', 'aloof']
she ['he', 'they', 'florence', 'everybody', 'thieve', 'edith', 'circumlocutions', 'we', 'honored', 'nobody']
paris ['france', 'despair', 'requisition', 'fleet', 'england', 'dart', 'custody', 'parliament', 'succession', 'bevis']
london ['england', 'town', 'our', 'itself', 'vain', 'paris', 'france', 'lincolnshire', 'italy', 'due']
table ['ground', 'window', 'floor', 'fire', 'wall', 'road', 'board', 'sofa', 'staircase', 'box']
rare ['mighty', 'moral', 'extensive', 'singular', 'brief', 'special', 'lively', 'female', 'mere', 'trifling']
monday ['floor', 'sides', 'sunday', 'wall', 'road', 'horseback', 'saturday', 'board', 'ground', 'entering']
sunday ['saturday', 'wall', 'kitchen', 'stone', 'sea', 'garden', 'hall', 'post', 'summer', 'passage']
man ['gentleman', 'woman', 'lady', 'person', 'boy', 'girl', 'creature', 'fellow', 'dog', 'servant']
woman ['lady', 'girl', 'gentleman', 'creature', 'man', 'servant', 'fellow', 'boy', 'soldier', 'person']
king ['baker', 'clergyman', 'mayor', 'bride', 'narrative', 'ghost', 'bill', 'tailor', 'cook', 'sexton']
queen ['mixture', 'field', 'size', 'bowl', 'forest', 'flower', 'beef', 'glare', 'brightest', 'receipt']
boy ['girl', 'child', 'woman', 'lady', 'servant', 'fellow', 'baby', 'doctor', 'lad', 'gentleman']
girl ['boy', 'woman', 'child', 'lady', 'creature', 'servant', 'fellow', 'baby', 'poor', 'gentleman']
"""

" \nLoss: 10.337905883789062 POW = 0.75 dot norm 2 epochs\nhe ['she', 'they', 'i', 'we', 'nicholas', 'everybody', 'nobody', 'it', 'newman', 'agglomeration']\nshe ['he', 'they', 'i', 'barbara', 'we', 'nicholas', 'everybody', 'fanny', 'husband', 'kate']\nparis ['london', 'gallery', 'centre', 'stationed', 'park', 'capitol', 'village', 'kitchen', 'execution', 'refreshments']\nlondon ['kitchen', 'street', 'town', 'city', 'england', 'front', 'market', 'church', 'hall', 'west']\ntable ['sofa', 'floor', 'desk', 'window', 'fire', 'wall', 'staircase', 'ground', 'fender', 'road']\nrare ['remarkable', 'serious', 'taste', 'genteel', 'romantic', 'ingenious', 'gentlemanly', 'considerable', 'deal', 'alarming']\nmonday ['saturday', 'sunday', 'dinner', 'next', 'cathedral', 'morrow', 'floor', 'gazing', 'nine', 'previous']\nsunday ['saturday', 'summer', 'day', 'cathedral', 'monday', 'week', 'neighbouring', 'stage', 'stone', 'clock']\nman ['gentleman', 'woman', 'lady', 'person', 'girl', 'fellow', 'boy', 'y