# A simple implementation of CBOW

Adapted from _Efficient estimation of word representations in vector space_ by Mikolov et al., 2013.

Author: Pierre Nugues

The imports

In [1]:
from tensorflow.keras import backend
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, Lambda, Average, GlobalAveragePooling1D
import regex as re
import os
from tensorflow.keras.utils import to_categorical
import numpy as np
from scipy.spatial.distance import cosine
from tqdm import tqdm
from random import shuffle

## Parameters

The embedding size and context size

In [2]:
embedding_dim = 100
w_size = 2
c_size = w_size * 2 + 1

## The Corpus

We can choose between dataset and execute locally or on colab

In [3]:
dataset = 'dickens'  # 'homer' dickens' 'selma' 'big'
colab = False # On my machine or on colab
debug = False

In [4]:
if colab:
    BASE_PATH = '/content/drive/My Drive/Colab Notebooks/'
else:
    BASE_PATH = '../../../'

In [5]:
if colab:
    from google.colab import drive
    drive.mount('/content/drive')

We read the files from a folder

In [6]:
def get_files(dir, suffix):
    """
    Returns all the files in a folder ending with suffix
    :param dir:
    :param suffix:
    :return: the list of file names
    """
    files = []
    for file in os.listdir(dir):
        if file.endswith(suffix):
            files.append(file)
    return files


def load_corpus(path):
    files = get_files(path, 'txt')
    files = [path + file for file in files]
    print(files)
    text = ''
    for file in files:
        text += open(file).read()
    return text

In [7]:
if dataset == 'homer':
    #text = 'Sing, O goddess, the anger of Achilles son of Peleus'.lower()
    text1 = open(BASE_PATH + 'corpus/iliad.mb.txt', encoding='utf-8').read().lower()
    text2 = open(BASE_PATH + 'corpus/odyssey.mb.txt', encoding='utf-8').read().lower()
    text = text1 + text2
    test_words = ['he', 'she', 'ulysses', 'penelope', 'achaeans', 'trojans']
if dataset == 'dickens':
    path = BASE_PATH + 'corpus/Dickens/'
    text = load_corpus(path)
    test_words = ['he', 'she', 'paris', 'london', 'table', 'rare', 'monday', 'sunday', 'man', 'woman', 'king', 'queen', 'boy',
                  'girl']
elif dataset == 'selma':
    path = BASE_PATH + 'corpus/Selma/'
    text = load_corpus(path)
    test_words = ['han', 'hon', 'att', 'bord', 'bordet', 'måndag', 'söndag', 'man', 'kvinna', 'kung', 'drottning',
                  'pojke', 'flicka']
elif dataset == 'big':
    path = BASE_PATH + 'corpus/Dickens/'
    text = load_corpus(path)
    path = BASE_PATH + 'corpus/Norvig/'
    text += load_corpus(path)
    test_words = ['he', 'she', 'paris', 'london', 'table', 'rare', 'monday', 'sunday', 'man', 'woman', 'king', 'queen', 'boy',
                  'girl']    

['../../../corpus/Dickens/Hard Times.txt', '../../../corpus/Dickens/Oliver Twist.txt', '../../../corpus/Dickens/Great Expectations.txt', '../../../corpus/Dickens/The Old Curiosity Shop.txt', '../../../corpus/Dickens/A Tale of Two Cities.txt', '../../../corpus/Dickens/Dombey and Son.txt', '../../../corpus/Dickens/The Pickwick Papers.txt', '../../../corpus/Dickens/Bleak House.txt', '../../../corpus/Dickens/Our Mutual Friend.txt', '../../../corpus/Dickens/The Mystery of Edwin Drood.txt', '../../../corpus/Dickens/Nicholas Nickleby.txt', '../../../corpus/Dickens/David Copperfield.txt', '../../../corpus/Dickens/Little Dorrit.txt', '../../../corpus/Dickens/A Christmas Carol in Prose.txt']


## Processing the Corpus

We set all the text in lowercase

In [8]:
text = text.lower()
words = re.findall('\p{L}+', text)
words[:5]

['hard', 'times', 'and', 'reprinted', 'pieces']

We extract the unique words

In [9]:
unique_words = sorted(list(set(words)))
unique_words[:10]

['a',
 'aaron',
 'aback',
 'abaft',
 'abandon',
 'abandoned',
 'abandoning',
 'abandonment',
 'abandons',
 'abase']

In [10]:
vocab_size = len(unique_words)
vocab_size

35221

And we create indices

In [11]:
word2idx = {word: i for (i, word) in enumerate(unique_words)}
idx2word = {v: k for k, v in word2idx.items()}
#word2idx

We map the words to their indices

In [12]:
word_idxs = list(map(word2idx.get, words))
word_idxs[:5]

[14222, 31182, 1071, 25401, 22543]

## The Architecture

And now the architecture

In [13]:
model = Sequential([
    Embedding(vocab_size, embedding_dim, input_length=2 * w_size),
    #GlobalAveragePooling1D(),
    #Lambda(lambda x: backend.sum(x, axis=1)),
    Lambda(lambda x: backend.mean(x, axis=1)),
    Dense(vocab_size, activation='softmax')
])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 4, 100)            3522100   
_________________________________________________________________
lambda (Lambda)              (None, 100)               0         
_________________________________________________________________
dense (Dense)                (None, 35221)             3557321   
Total params: 7,079,421
Trainable params: 7,079,421
Non-trainable params: 0
_________________________________________________________________


In [14]:
model.compile(loss='sparse_categorical_crossentropy', optimizer='rmsprop')

## Training the Model

We have two batch generators: one with indices as they come in the list, one with shuffled indices. Not sure which one is the best.

In [15]:
BATCH_SIZE = 1024

In [16]:
def minibatch_generator_shuffled(words):
    max_start_inx = len(words) - c_size
    batch_cnt = int(np.ceil(max_start_inx / BATCH_SIZE))
    shuffled_start_inx = np.array(range(max_start_inx))
    shuffle(shuffled_start_inx)
    for i in range(batch_cnt):    
        batch_indices = shuffled_start_inx[range(i * BATCH_SIZE, 
                                                 min((i + 1) * BATCH_SIZE, max_start_inx))]
        context_batch = []
        label_batch = []
        for start_inx in batch_indices:
            mid_inx = start_inx + w_size
            end_inx = start_inx + c_size
            context = words[start_inx:mid_inx] + words[mid_inx + 1:end_inx]
            label = words[mid_inx]
            context_batch.append(context)
            label_batch.append(words[mid_inx])
        X = np.array(context_batch)
        y = np.array(label_batch)
        yield X, y

In [17]:
def minibatch_generator(words):
    max_start_inx = len(words) - c_size
    batch_cnt = int(np.ceil(max_start_inx / BATCH_SIZE))
    for i in range(batch_cnt):
        batch_indices = range(i * BATCH_SIZE, 
                              min((i + 1) * BATCH_SIZE, max_start_inx))
        context_batch = []
        label_batch = []
        for start_inx in batch_indices:
            mid_inx = start_inx + w_size
            end_inx = start_inx + c_size
            context = words[start_inx:mid_inx] + words[mid_inx + 1:end_inx]
            label = words[mid_inx]
            context_batch.append(context)
            label_batch.append(words[mid_inx])
        X = np.array(context_batch)
        y = np.array(label_batch)
        yield X, y

The function to measure the vector similarity

In [18]:
def most_sim_vecs(vector, U, nbr_words=10):
    # Here cosine distance and not cosine
    # distance between equal vectors: 0. max distance: 2
    dist = [cosine(vector, U[i, :]) if np.any(U[i, :]) else 2
            for i in range(U.shape[0])]
    sorted_vectors = sorted(range(len(dist)), key=lambda k: dist[k])
    return sorted_vectors[1:nbr_words + 1]

In [19]:
EPOCHS = 10

And the training loop

In [20]:
for epoch in range(EPOCHS):
    print("Epoch {}/{}".format(epoch + 1, EPOCHS))
    for i, batch in tqdm(enumerate(minibatch_generator_shuffled(word_idxs))):
        X, y = batch
        loss = model.train_on_batch(X, y)
    print('Loss:', loss)
    vectors = model.get_weights()[0]
    most_sim_words = {}
    for w in test_words:
        most_sim_words[w] = most_sim_vecs(vectors[word2idx[w]], vectors)
        most_sim_words[w] = list(map(idx2word.get, most_sim_words[w]))
        print(w, most_sim_words[w])

0it [00:00, ?it/s]

Epoch 1/10


3277it [14:08,  3.86it/s]


Loss: 5.918920040130615
he ['she', 'they', 'nobody', 'we', 'i', 'florence', 'just', 'exactly', 'it', 'far']
she ['he', 'nobody', 'they', 'we', 'florence', 'i', 'just', 'edith', 'it', 'exactly']
paris ['parliament', 'dint', 'vain', 'france', 'coketown', 'england', 'humanity', 'despair', 'tiptoe', 'order']
london ['england', 'hers', 'sorts', 'pieces', 'themselves', 'degrees', 'kinds', 'france', 'intervals', 'paris']
table ['fire', 'ground', 'floor', 'road', 'wall', 'river', 'gate', 'door', 'window', 'pavement']
rare ['improvement', 'tiger', 'amusement', 'employment', 'exertion', 'instruction', 'hesitation', 'softened', 'calculation', 'dislike']
monday ['tiptoe', 'custody', 'lincolnshire', 'events', 'corners', 'peeping', 'amazement', 'attendance', 'salem', 'earth']
sunday ['summer', 'nuns', 'dutch', 'dreary', 'saturday', 'sombre', 'identical', 'fatal', 'winter', 'quarters']
man ['gentleman', 'woman', 'lady', 'person', 'creature', 'girl', 'moment', 'fine', 'thing', 'dog']
woman ['man', 'ge

0it [00:00, ?it/s]

girl ['boy', 'child', 'lad', 'servant', 'dwarf', 'major', 'stranger', 'jew', 'manager', 'lady']
Epoch 2/10


3277it [13:08,  4.16it/s]


Loss: 5.889798641204834
he ['she', 'nobody', 'it', 'they', 'herbert', 'nicholas', 'i', 'monks', 'newman', 'florence']
she ['he', 'nobody', 'florence', 'they', 'herbert', 'i', 'it', 'edith', 'estella', 'nicholas']
paris ['england', 'france', 'parliament', 'coketown', 'india', 'despair', 'london', 'yorkshire', 'succession', 'custody']
london ['england', 'paris', 'france', 'india', 'ourselves', 'cloisterham', 'yorkshire', 'tiptoe', 'holborn', 'vain']
table ['ground', 'fire', 'wall', 'road', 'sofa', 'pavement', 'window', 'hearth', 'board', 'floor']
rare ['desperate', 'severe', 'disinterested', 'buff', 'trifling', 'shrewd', 'sullen', 'comical', 'careless', 'sober']
monday ['noon', 'entering', 'tiptoe', 'saturday', 'peeping', 'thursday', 'midnight', 'horseback', 'arriving', 'dover']
sunday ['saturday', 'summer', 'day', 'winter', 'shady', 'fatal', 'post', 'coffee', 'sombre', 'memorable']
man ['gentleman', 'woman', 'lady', 'chap', 'person', 'ooman', 'soldier', 'girl', 'dog', 'fellow']
woman ['

0it [00:00, ?it/s]

girl ['boy', 'child', 'fellow', 'woman', 'lady', 'creature', 'servant', 'magistrate', 'ooman', 'gentleman']
Epoch 3/10


3277it [12:57,  4.21it/s]


Loss: 5.750729084014893
he ['she', 'it', 'they', 'nobody', 'i', 'nicholas', 'herbert', 'newman', 'monks', 'we']
she ['he', 'florence', 'nobody', 'they', 'i', 'it', 'estella', 'herbert', 'edith', 'everybody']
paris ['england', 'france', 'india', 'london', 'yorkshire', 'coketown', 'parliament', 'holborn', 'despair', 'dover']
london ['england', 'paris', 'france', 'india', 'holborn', 'yorkshire', 'canterbury', 'coketown', 'cloisterham', 'dover']
table ['ground', 'sofa', 'wall', 'road', 'hearth', 'counter', 'pavement', 'board', 'fire', 'box']
rare ['singular', 'desperate', 'terrible', 'mighty', 'careless', 'trifling', 'delicious', 'tender', 'faint', 'special']
monday ['saturday', 'thursday', 'wednesday', 'noon', 'sunday', 'entering', 'tiptoe', 'horseback', 'tuesday', 'midnight']
sunday ['saturday', 'summer', 'monday', 'christmas', 'day', 'winter', 'wednesday', 'stage', 'week', 'post']
man ['gentleman', 'woman', 'lady', 'person', 'ooman', 'chap', 'soldier', 'dog', 'girl', 'bailey']
woman ['l

0it [00:00, ?it/s]

girl ['boy', 'fellow', 'child', 'woman', 'creature', 'lady', 'creetur', 'feller', 'nurse', 'ooman']
Epoch 4/10


3277it [12:55,  4.22it/s]


Loss: 5.784028053283691
he ['she', 'it', 'they', 'nobody', 'i', 'nicholas', 'oliver', 'herbert', 'we', 'newman']
she ['he', 'florence', 'they', 'it', 'nobody', 'i', 'estella', 'herbert', 'edith', 'everybody']
paris ['england', 'france', 'india', 'london', 'coketown', 'parliament', 'holborn', 'yorkshire', 'newgate', 'dover']
london ['england', 'paris', 'france', 'india', 'holborn', 'canterbury', 'yorkshire', 'town', 'dover', 'coketown']
table ['counter', 'ground', 'sofa', 'wall', 'road', 'hearth', 'pavement', 'box', 'board', 'desk']
rare ['delicious', 'tender', 'capital', 'terrible', 'singular', 'careless', 'desperate', 'treacherous', 'faint', 'sturdy']
monday ['thursday', 'saturday', 'wednesday', 'sunday', 'friday', 'tiptoe', 'noon', 'horseback', 'entering', 'tuesday']
sunday ['saturday', 'monday', 'summer', 'christmas', 'wednesday', 'day', 'winter', 'stage', 'week', 'previous']
man ['gentleman', 'woman', 'lady', 'person', 'chap', 'soldier', 'ooman', 'dog', 'barnacle', 'blight']
woman 

0it [00:00, ?it/s]

girl ['boy', 'fellow', 'child', 'woman', 'creature', 'creetur', 'nurse', 'lady', 'servant', 'feller']
Epoch 5/10


3277it [15:26,  3.54it/s]


Loss: 5.697870254516602
he ['she', 'it', 'they', 'nicholas', 'i', 'oliver', 'nobody', 'we', 'herbert', 'reciprocal']
she ['he', 'they', 'florence', 'it', 'i', 'nobody', 'estella', 'nicholas', 'we', 'reciprocal']
paris ['england', 'india', 'london', 'france', 'coketown', 'yorkshire', 'parliament', 'newgate', 'holborn', 'dover']
london ['paris', 'england', 'india', 'france', 'canterbury', 'holborn', 'town', 'yorkshire', 'yarmouth', 'cloisterham']
table ['counter', 'ground', 'sofa', 'road', 'box', 'wall', 'pavement', 'desk', 'staircase', 'hearth']
rare ['delicious', 'treacherous', 'pious', 'tender', 'terrible', 'capital', 'queer', 'faint', 'singular', 'genteel']
monday ['thursday', 'wednesday', 'saturday', 'sunday', 'friday', 'noon', 'horseback', 'tiptoe', 'tuesday', 'entering']
sunday ['saturday', 'monday', 'summer', 'wednesday', 'christmas', 'winter', 'day', 'stage', 'week', 'thursday']
man ['gentleman', 'woman', 'person', 'lady', 'barnacle', 'soldier', 'chap', 'dog', 'ooman', 'blight']

0it [00:00, ?it/s]

girl ['child', 'boy', 'creature', 'fellow', 'woman', 'creetur', 'nurse', 'servant', 'baby', 'lady']
Epoch 6/10


3277it [16:43,  3.27it/s]


Loss: 5.579746246337891
he ['she', 'it', 'they', 'nicholas', 'oliver', 'i', 'we', 'nobody', 'him', 'himself']
she ['he', 'they', 'it', 'florence', 'i', 'nicholas', 'nobody', 'oliver', 'we', 'dora']
paris ['india', 'england', 'london', 'france', 'newgate', 'dover', 'marseilles', 'canterbury', 'rome', 'holborn']
london ['paris', 'england', 'india', 'france', 'canterbury', 'holborn', 'town', 'cloisterham', 'dover', 'oxford']
table ['counter', 'ground', 'sofa', 'box', 'road', 'staircase', 'desk', 'piano', 'pavement', 'fire']
rare ['treacherous', 'delicious', 'pious', 'queer', 'tender', 'fearful', 'thriving', 'genteel', 'faint', 'terrible']
monday ['thursday', 'wednesday', 'sunday', 'saturday', 'friday', 'noon', 'tuesday', 'tomorrow', 'morrow', 'horseback']
sunday ['saturday', 'monday', 'summer', 'wednesday', 'winter', 'christmas', 'thursday', 'tuesday', 'friday', 'day']
man ['gentleman', 'woman', 'person', 'lady', 'barnacle', 'dog', 'boy', 'soldier', 'ooman', 'chap']
woman ['man', 'creatur

0it [00:00, ?it/s]

girl ['child', 'creature', 'boy', 'woman', 'fellow', 'creetur', 'nurse', 'servant', 'wretch', 'baby']
Epoch 7/10


3277it [15:36,  3.50it/s]


Loss: 5.814074516296387
he ['she', 'it', 'they', 'oliver', 'nicholas', 'i', 'himself', 'him', 'we', 'who']
she ['he', 'they', 'it', 'florence', 'i', 'nicholas', 'oliver', 'we', 'nobody', 'bella']
paris ['london', 'india', 'england', 'france', 'dover', 'marseilles', 'canterbury', 'rome', 'newgate', 'holborn']
london ['paris', 'england', 'india', 'canterbury', 'town', 'france', 'holborn', 'oxford', 'dover', 'cloisterham']
table ['counter', 'ground', 'box', 'staircase', 'sofa', 'road', 'piano', 'fire', 'desk', 'finger']
rare ['treacherous', 'delicious', 'pious', 'queer', 'comical', 'tender', 'fearful', 'thriving', 'skilful', 'disinterested']
monday ['thursday', 'wednesday', 'sunday', 'saturday', 'friday', 'noon', 'tomorrow', 'morrow', 'tuesday', 'saturdays']
sunday ['saturday', 'monday', 'summer', 'wednesday', 'winter', 'christmas', 'thursday', 'tuesday', 'friday', 'autumn']
man ['gentleman', 'woman', 'person', 'lady', 'barnacle', 'dog', 'boy', 'creature', 'footman', 'jerry']
woman ['man'

0it [00:00, ?it/s]

girl ['child', 'creature', 'woman', 'boy', 'fellow', 'creetur', 'wretch', 'servant', 'nurse', 'baby']
Epoch 8/10


3277it [14:19,  3.81it/s]


Loss: 5.654291152954102
he ['she', 'it', 'they', 'oliver', 'nicholas', 'i', 'himself', 'who', 'him', 'then']
she ['he', 'they', 'it', 'i', 'florence', 'nicholas', 'oliver', 'we', 'him', 'bella']
paris ['india', 'london', 'england', 'canterbury', 'marseilles', 'dover', 'france', 'newgate', 'rome', 'venice']
london ['paris', 'england', 'india', 'town', 'canterbury', 'dover', 'cloisterham', 'france', 'oxford', 'yorkshire']
table ['counter', 'ground', 'box', 'piano', 'staircase', 'fire', 'desk', 'road', 'sofa', 'ladder']
rare ['treacherous', 'pious', 'delicious', 'queer', 'comical', 'skilful', 'fearful', 'faint', 'severe', 'tender']
monday ['thursday', 'wednesday', 'sunday', 'saturday', 'friday', 'noon', 'morrow', 'tomorrow', 'tuesday', 'yesterday']
sunday ['saturday', 'monday', 'summer', 'wednesday', 'winter', 'autumn', 'friday', 'tuesday', 'christmas', 'thursday']
man ['gentleman', 'woman', 'person', 'lady', 'barnacle', 'dog', 'creature', 'boy', 'footman', 'jerry']
woman ['man', 'creatur

0it [00:00, ?it/s]

girl ['creature', 'child', 'woman', 'boy', 'fellow', 'creetur', 'wretch', 'nurse', 'servant', 'baby']
Epoch 9/10


3277it [18:03,  3.02it/s]


Loss: 5.590196132659912
he ['she', 'it', 'they', 'himself', 'oliver', 'i', 'nicholas', 'who', 'him', 'then']
she ['he', 'they', 'it', 'i', 'nicholas', 'florence', 'oliver', 'we', 'him', 'almost']
paris ['india', 'london', 'england', 'canterbury', 'marseilles', 'dover', 'newgate', 'rome', 'venice', 'france']
london ['paris', 'england', 'india', 'town', 'dover', 'canterbury', 'cloisterham', 'yorkshire', 'france', 'newgate']
table ['counter', 'piano', 'ground', 'staircase', 'desk', 'box', 'fire', 'sofa', 'road', 'ladder']
rare ['delicious', 'pious', 'comical', 'treacherous', 'skilful', 'queer', 'faint', 'fearful', 'severe', 'pleasant']
monday ['thursday', 'sunday', 'wednesday', 'saturday', 'friday', 'noon', 'tomorrow', 'morrow', 'tuesday', 'yesterday']
sunday ['saturday', 'monday', 'summer', 'wednesday', 'autumn', 'winter', 'tuesday', 'friday', 'thursday', 'march']
man ['gentleman', 'woman', 'person', 'lady', 'barnacle', 'dog', 'boy', 'creature', 'jerry', 'footman']
woman ['man', 'creatur

0it [00:00, ?it/s]

girl ['child', 'creature', 'woman', 'boy', 'fellow', 'wretch', 'creetur', 'nurse', 'servant', 'schoolmaster']
Epoch 10/10


3277it [15:43,  3.47it/s]


Loss: 5.671951770782471
he ['she', 'it', 'they', 'himself', 'i', 'who', 'oliver', 'him', 'nicholas', 'then']
she ['he', 'they', 'it', 'i', 'nicholas', 'oliver', 'florence', 'him', 'we', 'again']
paris ['london', 'india', 'england', 'canterbury', 'marseilles', 'dover', 'newgate', 'rome', 'venice', 'yarmouth']
london ['paris', 'england', 'india', 'town', 'dover', 'canterbury', 'cloisterham', 'yorkshire', 'france', 'church']
table ['counter', 'piano', 'desk', 'staircase', 'ground', 'sofa', 'box', 'trunk', 'fire', 'lid']
rare ['severe', 'comical', 'delicious', 'queer', 'pious', 'faint', 'skilful', 'pleasant', 'sincere', 'fearful']
monday ['thursday', 'sunday', 'saturday', 'wednesday', 'friday', 'noon', 'morrow', 'tomorrow', 'tuesday', 'yesterday']
sunday ['saturday', 'monday', 'summer', 'wednesday', 'autumn', 'winter', 'wintry', 'friday', 'tuesday', 'march']
man ['gentleman', 'woman', 'person', 'lady', 'barnacle', 'dog', 'boy', 'jerry', 'creature', 'fellow']
woman ['man', 'creature', 'girl

In [None]:
# vectors[:5]

In [None]:
"""for word, i in word2idx.items():
    str_vec = ' '.join(map(str, list(vectors[i, :])))
    print('{} {}\n'.format(word, str_vec))"""

In [None]:
"""
GlobalAveragePooling1D
nonshuffled
Loss: 5.689522743225098
he ['he', 'she', 'it', 'i', 'that', 'they', 'then', 'who', 'him', 'and']
she ['she', 'he', 'it', 'they', 'i', 'that', 'who', 'there', 'then', 'him']
paris ['paris', 'yarmouth', 'france', 'italy', 'dover', 'marseilles', 'print', 'newgate', 'venice', 'requisition']
london ['london', 'england', 'india', 'paris', 'town', 'france', 'dover', 'yarmouth', 'cloisterham', 'holborn']
table ['table', 'ground', 'counter', 'finger', 'sofa', 'box', 'desk', 'pavement', 'floor', 'staircase']
rare ['rare', 'skilful', 'pious', 'capital', 'decent', 'lively', 'gentlemanly', 'thriving', 'romantic', 'scientific']
monday ['monday', 'thursday', 'saturday', 'wednesday', 'sunday', 'tuesday', 'friday', 'noon', 'horseback', 'morrow']
sunday ['sunday', 'saturday', 'monday', 'summer', 'wednesday', 'day', 'winter', 'evening', 'thursday', 'friday']
man ['man', 'gentleman', 'woman', 'lady', 'person', 'fellow', 'creature', 'boy', 'friend', 'chap']
woman ['woman', 'lady', 'man', 'creature', 'gentleman', 'girl', 'fellow', 'boy', 'person', 'chap']
king ['king', 'pastry', 'stroller', 'horse', 'cook', 'chandler', 'knights', 'roc', 'bill', 'cathedral']
queen ['queen', 'giant', 'blade', 'poet', 'jewel', 'proctor', 'band', 'livelihood', 'infection', 'nosegay']
boy ['boy', 'girl', 'child', 'fellow', 'creature', 'woman', 'lady', 'man', 'creetur', 'friend']
girl ['girl', 'creature', 'boy', 'fellow', 'woman', 'child', 'creetur', 'lady', 'wretch', 'man']

vectors[:5]"""

In [None]:
"""Loss: 5.9238457679748535 (colab)
sum
nonshuffled
he ['he', 'she', 'they', 'it', 'who', 'nicholas', 'i', 'then', 'himself', 'never']
she ['she', 'he', 'they', 'it', 'nicholas', 'who', 'never', 'have', 'then', 'i']
paris ['paris', 'france', 'venice', 'rochester', 'pentonville', 'dates', 'boredom', 'switzerland', 'italy', 'england']
london ['london', 'town', 'paris', 'england', 'prison', 'city', 'papers', 'france', 'india', 'pass']
table ['table', 'desk', 'room', 'chair', 'sofa', 'window', 'finger', 'box', 'ground', 'door']
rare ['rare', 'thorough', 'severe', 'gentlemanly', 'doleful', 'smart', 'pious', 'romantic', 'practical', 'trifling']
monday ['monday', 'sunday', 'morrow', 'thursday', 'saturday', 'noon', 'wednesday', 'tomorrow', 'friday', 'ensuing']
sunday ['sunday', 'saturday', 'monday', 'day', 'evening', 'winter', 'summer', 'autumn', 'wednesday', 'visit']
man ['man', 'woman', 'gentleman', 'lady', 'person', 'fellow', 'boy', 'dog', 'creature', 'female']
woman ['woman', 'man', 'lady', 'creature', 'gentleman', 'girl', 'fellow', 'child', 'figure', 'person']
king ['king', 'pastry', 'corn', 'inn', 'chandler', 'court', 'butcher', 'mayor', 'snow', 'saint']
queen ['queen', 'proctor', 'criminal', 'miser', 'pitcher', 'cab', 'radiance', 'lamb', 'bushel', 'cove']
boy ['boy', 'fellow', 'girl', 'friend', 'bill', 'child', 'master', 'man', 'lad', 'dog']
girl ['girl', 'creature', 'child', 'woman', 'boy', 'fellow', 'creetur', 'wretch', 'lady', 'lad']
"""

In [None]:
"""Epoch 10/10 (colab)

3277it [10:09,  5.38it/s]
GlobalAveragePooling1D
nonshuffled
Loss: 6.253807544708252
he ['he', 'she', 'it', 'they', 'and', 'that', 'then', 'himself', 'but', 'who']
she ['she', 'he', 'it', 'they', 'then', 'there', 'him', 'i', 'almost', 'but']
paris ['paris', 'france', 'yorkshire', 'yarmouth', 'newgate', 'italy', 'clerkenwell', 'requisition', 'numbers', 'print']
london ['london', 'england', 'india', 'paris', 'town', 'yorkshire', 'france', 'leadenhall', 'market', 'holborn']
table ['table', 'counter', 'ground', 'desk', 'finger', 'box', 'room', 'sofa', 'hearth', 'lamp']
rare ['rare', 'romantic', 'frightful', 'capital', 'pious', 'mighty', 'slight', 'mysterious', 'judicious', 'smart']
monday ['monday', 'saturday', 'thursday', 'sunday', 'wednesday', 'tuesday', 'friday', 'morrow', 'christmas', 'noon']
sunday ['sunday', 'saturday', 'monday', 'summer', 'day', 'wednesday', 'tuesday', 'winter', 'evening', 'thursday']
man ['man', 'gentleman', 'woman', 'lady', 'boy', 'person', 'fellow', 'creature', 'chap', 'dog']
woman ['woman', 'lady', 'man', 'girl', 'creature', 'gentleman', 'fellow', 'boy', 'chap', 'creetur']
king ['king', 'stroller', 'pastry', 'chandler', 'cook', 'roc', 'dustman', 'mayor', 'merchant', 'ark']
queen ['queen', 'criminal', 'seaman', 'fortification', 'martyr', 'basin', 'poet', 'odour', 'dish', 'tenant']
boy ['boy', 'girl', 'fellow', 'child', 'creature', 'woman', 'man', 'creetur', 'lady', 'gentleman']
girl ['girl', 'creature', 'boy', 'woman', 'fellow', 'child', 'lady', 'creetur', 'wretch', 'man']"""


In [None]:
"""Loss: 6.217106819152832 (colab)
nonshuffled
mean
he ['he', 'she', 'it', 'i', 'and', 'they', 'that', 'who', 'but', 'again']
she ['she', 'he', 'it', 'they', 'i', 'who', 'and', 'that', 'again', 'him']
paris ['paris', 'france', 'yarmouth', 'print', 'germany', 'requisition', 'england', 'newgate', 'india', 'italy']
london ['london', 'england', 'yorkshire', 'india', 'paris', 'holborn', 'town', 'market', 'greta', 'neutral']
table ['table', 'ground', 'board', 'finger', 'counter', 'room', 'chair', 'sofa', 'road', 'fire']
rare ['rare', 'pious', 'musical', 'trifling', 'skilful', 'monstrous', 'judicious', 'brilliant', 'severe', 'subtle']
monday ['monday', 'thursday', 'saturday', 'sunday', 'friday', 'wednesday', 'tuesday', 'morrow', 'succeeding', 'noon']
sunday ['sunday', 'saturday', 'monday', 'summer', 'day', 'friday', 'winter', 'wednesday', 'evening', 'memorable']
man ['man', 'woman', 'gentleman', 'lady', 'person', 'boy', 'fellow', 'dog', 'chap', 'friend']
woman ['woman', 'lady', 'man', 'girl', 'gentleman', 'creature', 'fellow', 'boy', 'person', 'chap']
king ['king', 'pastry', 'chandler', 'merchant', 'lawyer', 'stationer', 'stroller', 'maker', 'clergyman', 'tailor']
queen ['queen', 'giant', 'blade', 'miniature', 'mask', 'decanter', 'band', 'mug', 'whirlwind', 'mission']
boy ['boy', 'girl', 'fellow', 'child', 'woman', 'creature', 'man', 'lady', 'creetur', 'friend']
girl ['girl', 'creature', 'boy', 'woman', 'child', 'fellow', 'lady', 'creetur', 'wretch', 'papa']
"""

In [None]:
"""Loss: 5.463151454925537
shuffled
mean
he ['he', 'she', 'they', 'it', 'who', 'him', 'i', 'then', 'we', 'himself']
she ['she', 'he', 'they', 'it', 'i', 'we', 'him', 'who', 'florence', 'then']
paris ['paris', 'london', 'france', 'england', 'india', 'newgate', 'yorkshire', 'yarmouth', 'canterbury', 'ipswich']
london ['london', 'paris', 'england', 'india', 'dover', 'town', 'france', 'cloisterham', 'yorkshire', 'ipswich']
table ['table', 'ground', 'box', 'desk', 'counter', 'sofa', 'piano', 'staircase', 'fire', 'road']
rare ['rare', 'subtle', 'severe', 'pious', 'trifling', 'feverish', 'skilful', 'delicious', 'gypsy', 'brilliant']
monday ['monday', 'thursday', 'morrow', 'wednesday', 'sunday', 'tomorrow', 'saturday', 'noon', 'friday', 'tuesday']
sunday ['sunday', 'saturday', 'summer', 'monday', 'autumn', 'evening', 'christmas', 'day', 'winter', 'afternoon']
man ['man', 'gentleman', 'woman', 'person', 'lady', 'dog', 'boy', 'creature', 'female', 'fellow']
woman ['woman', 'man', 'girl', 'creature', 'lady', 'gentleman', 'person', 'mistress', 'fellow', 'boy']
king ['king', 'corn', 'pastry', 'bride', 'glare', 'gallery', 'usurer', 'chateau', 'list', 'broker']
queen ['queen', 'dish', 'corn', 'band', 'manuscript', 'porter', 'glare', 'king', 'milk', 'forest']
boy ['boy', 'girl', 'child', 'fellow', 'man', 'creature', 'woman', 'bill', 'clerk', 'doctor']
girl ['girl', 'child', 'woman', 'creature', 'boy', 'fellow', 'wretch', 'baby', 'creetur', 'man']

"""

In [None]:
"""
Loss: 5.788453102111816
mac
shuffled
GlobalAveragePooling1D
he ['he', 'she', 'they', 'it', 'be', 'that', 'i', 'who', 'himself', 'but']
she ['she', 'he', 'they', 'it', 'i', 'be', 'almost', 'florence', 'then', 'have']
paris ['paris', 'england', 'london', 'yarmouth', 'newgate', 'coketown', 'rome', 'rochester', 'france', 'blunderstone']
london ['london', 'paris', 'england', 'town', 'india', 'dover', 'canterbury', 'rome', 'yarmouth', 'lodge']
table ['table', 'desk', 'counter', 'box', 'sofa', 'ground', 'hearth', 'staircase', 'piano', 'coverlet']
rare ['rare', 'pious', 'singular', 'queer', 'comical', 'romantic', 'thriving', 'liberal', 'fearful', 'ridiculous']
monday ['monday', 'sunday', 'thursday', 'saturday', 'noon', 'wednesday', 'tuesday', 'friday', 'morrow', 'tomorrow']
sunday ['sunday', 'monday', 'saturday', 'summer', 'autumn', 'wednesday', 'march', 'christmas', 'friday', 'tuesday']
man ['man', 'gentleman', 'woman', 'person', 'lady', 'boy', 'creature', 'dog', 'fellow', 'female']
woman ['woman', 'man', 'girl', 'lady', 'creature', 'gentleman', 'fellow', 'person', 'child', 'boy']
king ['king', 'pastry', 'tailor', 'bride', 'clerk', 'ghost', 'clergyman', 'mob', 'shops', 'animal']
queen ['queen', 'sheep', 'king', 'fish', 'basin', 'flame', 'professor', 'palace', 'beer', 'ship']
boy ['boy', 'girl', 'child', 'fellow', 'man', 'woman', 'creature', 'gentleman', 'lad', 'dog']
girl ['girl', 'child', 'woman', 'creature', 'boy', 'fellow', 'wretch', 'schoolmaster', 'nurse', 'matron']
"""

In [None]:
"""Loss: 5.444209098815918
mac
shuffled
GlobalAveragePooling1D
he ['she', 'it', 'they', 'i', 'him', 'them', 'who', 'there', 'himself', 'that']
she ['he', 'it', 'they', 'i', 'there', 'we', 'them', 'him', 'nicholas', 'who']
paris ['london', 'england', 'yarmouth', 'france', 'italy', 'yorkshire', 'india', 'coketown', 'newgate', 'canterbury']
london ['paris', 'india', 'england', 'town', 'france', 'dover', 'newgate', 'cloisterham', 'yarmouth', 'canterbury']
table ['ground', 'counter', 'sofa', 'desk', 'box', 'piano', 'lawn', 'lid', 'staircase', 'road']
rare ['funny', 'skilful', 'ridiculous', 'romantic', 'pious', 'fearful', 'comical', 'genteel', 'careless', 'faint']
monday ['saturday', 'sunday', 'wednesday', 'thursday', 'friday', 'morrow', 'noon', 'tuesday', 'tomorrow', 'next']
sunday ['saturday', 'monday', 'summer', 'autumn', 'wednesday', 'evening', 'winter', 'march', 'windy', 'night']
man ['gentleman', 'woman', 'lady', 'person', 'dog', 'boy', 'creature', 'female', 'fellow', 'personage']
woman ['man', 'girl', 'lady', 'creature', 'gentleman', 'fellow', 'boy', 'child', 'person', 'wretch']
king ['auctioneer', 'pastry', 'clergyman', 'merchant', 'bride', 'tailor', 'greengrocer', 'broker', 'marshal', 'committee']
queen ['professor', 'king', 'ship', 'list', 'procession', 'mansion', 'committee', 'field', 'basin', 'sheep']
boy ['girl', 'child', 'fellow', 'creature', 'woman', 'bill', 'lad', 'man', 'wretch', 'dog']
girl ['child', 'woman', 'creature', 'boy', 'fellow', 'wretch', 'widow', 'nurse', 'baby', 'creetur']"""

In [None]:
"""
Loss: 5.555170059204102
mac
shuffled
mean
he ['she', 'it', 'they', 'who', 'i', 'nicholas', 'that', 'himself', 'but', 'now']
she ['he', 'it', 'they', 'florence', 'nicholas', 'bella', 'i', 'rosa', 'kit', 'everybody']
paris ['london', 'france', 'england', 'newgate', 'india', 'yarmouth', 'dover', 'yorkshire', 'canterbury', 'italy']
london ['paris', 'england', 'france', 'town', 'india', 'clerkenwell', 'yarmouth', 'canterbury', 'cloisterham', 'newgate']
table ['sofa', 'counter', 'desk', 'ground', 'piano', 'box', 'staircase', 'sideboard', 'lawn', 'packet']
rare ['queer', 'severe', 'skilful', 'desperate', 'comical', 'musical', 'treacherous', 'restless', 'ridiculous', 'pious']
monday ['wednesday', 'thursday', 'sunday', 'saturday', 'noon', 'tuesday', 'friday', 'morrow', 'tomorrow', 'succeeding']
sunday ['saturday', 'monday', 'summer', 'winter', 'autumn', 'day', 'wednesday', 'evening', 'windy', 'succeeding']
man ['gentleman', 'woman', 'person', 'lady', 'creature', 'boy', 'dog', 'female', 'fellow', 'barnacle']
woman ['man', 'girl', 'creature', 'lady', 'gentleman', 'fellow', 'child', 'boy', 'wretch', 'person']
king ['pastry', 'marshal', 'clerk', 'palace', 'greengrocer', 'bride', 'corn', 'clergyman', 'tailor', 'queen']
queen ['king', 'jewel', 'palace', 'committee', 'field', 'basin', 'vessel', 'mansion', 'ship', 'sheep']
boy ['girl', 'child', 'creature', 'woman', 'fellow', 'man', 'gentleman', 'lad', 'schoolmaster', 'officer']
girl ['child', 'creature', 'woman', 'boy', 'fellow', 'wretch', 'creetur', 'lad', 'patient', 'schoolmaster']


"""

In [None]:
"""Loss: 5.690605640411377
mac
nonshuffled
mean
he ['she', 'it', 'but', 'that', 'who', 'and', 'him', 'i', 'then', 'they']
she ['he', 'it', 'who', 'they', 'i', 'but', 'that', 'there', 'him', 'then']
paris ['france', 'yarmouth', 'pentonville', 'print', 'england', 'newgate', 'india', 'yorkshire', 'italy', 'requisition']
london ['england', 'india', 'paris', 'town', 'france', 'print', 'yorkshire', 'market', 'cloisterham', 'chancery']
table ['ground', 'counter', 'finger', 'carpet', 'box', 'desk', 'piano', 'room', 'sofa', 'chimneypiece']
rare ['comical', 'severe', 'plentiful', 'musical', 'sincere', 'shrewd', 'reddish', 'vague', 'grand', 'feverish']
monday ['saturday', 'sunday', 'thursday', 'wednesday', 'tuesday', 'morrow', 'friday', 'noon', 'tomorrow', 'previous']
sunday ['saturday', 'monday', 'summer', 'day', 'wednesday', 'winter', 'tuesday', 'night', 'thursday', 'christmas']
man ['gentleman', 'woman', 'lady', 'person', 'boy', 'fellow', 'friend', 'dog', 'female', 'chap']
woman ['lady', 'man', 'gentleman', 'girl', 'creature', 'boy', 'fellow', 'ooman', 'chap', 'person']
king ['pastry', 'knights', 'recorder', 'jeweller', 'stroller', 'cathedral', 'chandler', 'ark', 'symond', 'breadth']
queen ['hue', 'band', 'committee', 'jar', 'fortification', 'blade', 'traitor', 'giant', 'dissolution', 'gradual']
boy ['girl', 'child', 'fellow', 'woman', 'lady', 'man', 'creature', 'friend', 'gentleman', 'creetur']
girl ['creature', 'boy', 'woman', 'child', 'fellow', 'lady', 'creetur', 'wretch', 'papa', 'man']"""

In [None]:
"""
Loss: 5.6113481521606445
mac
shuffled
mean
he ['she', 'it', 'they', 'who', 'himself', 'him', 'then', 'oliver', 'almost', 'that']
she ['he', 'they', 'it', 'florence', 'nicholas', 'i', 'herself', 'then', 'him', 'oliver']
paris ['london', 'france', 'england', 'india', 'newgate', 'dover', 'venice', 'rome', 'italy', 'yorkshire']
london ['paris', 'india', 'england', 'town', 'france', 'dover', 'canterbury', 'cloisterham', 'yorkshire', 'chertsey']
table ['counter', 'sofa', 'desk', 'ground', 'staircase', 'lid', 'chimneypiece', 'hearth', 'box', 'piano']
rare ['terrible', 'faint', 'severe', 'vivid', 'disinterested', 'treacherous', 'fierce', 'delicious', 'powerful', 'genteel']
monday ['sunday', 'wednesday', 'thursday', 'saturday', 'friday', 'noon', 'tomorrow', 'tuesday', 'morrow', 'next']
sunday ['saturday', 'monday', 'wednesday', 'summer', 'autumn', 'christmas', 'day', 'winter', 'evening', 'tuesday']
man ['gentleman', 'woman', 'person', 'lady', 'creature', 'dog', 'fellow', 'boy', 'female', 'men']
woman ['man', 'creature', 'girl', 'gentleman', 'lady', 'fellow', 'boy', 'person', 'nurse', 'child']
king ['auctioneer', 'horn', 'corn', 'animal', 'committee', 'pastry', 'merchant', 'bride', 'queen', 'richest']
queen ['system', 'king', 'basin', 'ship', 'circle', 'dish', 'pudding', 'committee', 'flower', 'list']
boy ['girl', 'child', 'fellow', 'creature', 'woman', 'man', 'lad', 'gentleman', 'schoolmaster', 'officer']
girl ['creature', 'child', 'woman', 'boy', 'fellow', 'nurse', 'creetur', 'wretch', 'baby', 'lad']"""