In [None]:
import time
import torch
from torch.utils.data import DataLoader, TensorDataset
import torch.nn as nn
from tqdm import tqdm
import pickle
import random
import numpy as np
from collections import Counter, defaultdict
import numpy as np
from torch import FloatTensor as FT

### Instructions
For this part, fill in the required code and make the notebook work. This wll be very similar to the Skip-Gram model, but a little more difficult. Look for the """ FILL IN """ string to guide you.

In [None]:
# Where do I want to run my job. You can do "cuda" on linux machines
DEVICE = "mps" if torch.backends.mps.is_available() else  "cpu"
# DEVICE = "cuda" if torch.cuda.is_available() else  "cpu"

# The batch size in Adam or SGD
BATCH_SIZE = 512

# Number of epochs
NUM_EPOCHS = 10

# Predict from 2 words the inner word for CBOW
# I.e. I'll have a window like ["a", "b", "c"] of continuous text (each is a word)
# We'll predict each of wc = ["a", "c"] from "b" = wc for Skip-Gram
# For CBOW, we'll use ["a", "c"] to predict "b" = wo
WINDOW = 1

# Negative samples.
K = 4

The text8 Wikipedia corpus. 100M characters.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

!du -h text8

f = open('/content/drive/MyDrive/text8/text8', 'r')
text = f.read()
# One big string of size 100M
print(len(text))

100000000


In [None]:
punc = '!"#$%&()*+,-./:;<=>?@[\\]^_\'{|}~\t\n'

# Can do regular expressions here too
for c in punc:
    if c in text:
        text.replace(c, ' ')

In [None]:
# A very crude tokenizer you get for free: lower case and also split on spaces
# This will not work!
# Split text on space and strip each word
# You should get a list "words" which is text but each element is a word in order
TOKENIZER = get_tokenizer("basic_english")
""" FILL IN """

In [None]:
words = TOKENIZER(text)
f = Counter(words)
# Fix the above
""" FILL IN """

In [None]:
len(words)

17005207

In [None]:
# Do a very crude filter on the text which removes all very popular words
text = [word for word in words if f[word] > 5]

In [None]:
text[0:5]

['anarchism', 'originated', 'as', 'a', 'term']

In [None]:
VOCAB = build_vocab_from_iterator([text])
# Rebuild the vocabulary from text above
""" FILL IN """

In [None]:
# Populate these maps manually using Counter or defaltdict
# This will not work
""" FILL IN """

# word -> int hash map
stoi = VOCAB.get_stoi()
# int -> word hash map
itos = VOCAB.get_itos()

In [None]:
stoi['as']

11

In [None]:
# Total number of words; you should see about 63K as below
len(stoi)

63641

In [None]:
f = Counter(text)
# This is the probability that we pick a word in the corpus
z = {word: f[word] / len(text) for word in f}

In [None]:
threshold = 1e-5
# Probability that word is kept while subsampling
# This is explained here and sightly differet from the paper: http://mccormickml.com/2017/01/11/word2vec-tutorial-part-2-negative-sampling/
p_keep = {word: (np.sqrt(z[word] / 0.001) + 1)*(0.0001 / z[word]) for word in f}

In [None]:
# This is in the integer space
train_dataset = [word for word in text if random.random() < p_keep[word]]

# Rebuild the vocabulary
VOCAB = build_vocab_from_iterator([train_dataset])

In [None]:
len(train_dataset)

7847755

In [None]:
# Repopulate the stoi and itos maps again now that you dropped some words
""" FILL IN """
# word -> int mapping
stoi = VOCAB.get_stoi()
# int -> word mapping
itos = VOCAB.get_itos()

In [None]:
# The vocabulary size after we do all the filters
len(VOCAB)

63641

In [None]:
# The probability we draw something for negative sampling
f = Counter(train_dataset)
p = torch.zeros(len(VOCAB))

# Downsample frequent words and upsample less frequent
s = sum([np.power(freq, 0.75) for word, freq in f.items()])

for word in f:
    p[stoi[word]] = np.power(f[word], 0.75) / s

In [None]:
# Map everything to integers
# This might not work be careful w the above ...
train_dataset = [stoi[word] for word in text]

In [None]:
# This just gets the (wc, wo) pairs that are positive - they are seen together!
def get_tokenized_dataset(dataset, verbose=False):
    x_list = []

    for i, token in enumerate(dataset):
        m = 1

        # Get the left and right tokens
        start = """ FILL IN """
        left_tokens = """ FILL IN """

        end = """ FILL IN """
        right_tokens = """ FILL IN """

        # Check these are the same length, and if so use them to add a row of data. This should be a list like
        # [a, c, b] where b is the center word
        if len(left_tokens) == len(right_tokens):
            w_context = """ FILL IN """

            wc = """ FILL IN """

            x_list.extend(
                """ FILL IN """
            )

    return x_list

In [None]:
train_x_list = get_tokenized_dataset(train_dataset, verbose=False)

In [None]:
pickle.dump(train_x_list, open('train_x_list.pkl', 'wb'))

In [None]:
train_x_list = pickle.load(open('train_x_list.pkl', 'rb'))

In [None]:
# These are (wc, wo) pairs. All are y = +1 by design
train_x_list[:10]

[[5233, 11, 3083],
 [3083, 6, 11],
 [11, 190, 6],
 [6, 1, 190],
 [190, 3133, 1],
 [1, 45, 3133],
 [3133, 59, 45],
 [45, 138, 59],
 [59, 134, 138],
 [138, 741, 134]]

In [None]:
# The number of things of BATCH_SIZE = 512
assert(len(train_x_list) // BATCH_SIZE == 32579)

32579

### Set up the dataloader.

In [None]:
train_dl = DataLoader(
    TensorDataset(
        torch.tensor(train_x_list).to(DEVICE),
    ),
    batch_size=BATCH_SIZE,
    shuffle=True
)

In [None]:
for xb in train_dl:
    assert(xb[0].shape == (BATCH_SIZE, 3))
    break

### Words we'll use to asses the quality of the model ...

In [None]:
valid_ids = torch.tensor([
    stoi['money'],
    stoi['lion'],
    stoi['africa'],
    stoi['musician'],
    stoi['dance'],
])

### Get the model.

In [None]:
class CBOWNegativeSampling(nn.Module):
    def __init__(self, vocab_size, embed_dim):
        super(CBOWNegativeSampling, self).__init__()
        self.A = """ FILL IN """ # Context vectors - center word
        self.B = """ FILL IN """ # Output vectors - words around the center word
        self.init_weights()

    def init_weights(self):
        # Is this the best way? Not sure
        initrange = 0.5
        self.A.weight.data.uniform_(-initrange, initrange)
        self.B.weight.data.uniform_(-initrange, initrange)

    def forward(self, x):
        # N is the batch size
        # x is (N, 3)

        # Context words are 2m things, m = 1 so w_context is (N, 2) while wc is (N, 1)
        w_context, wc = """ FILL IN """

        # Each of these is (N, 2, D) since each context has 2 word
        # We want this to be (N, D) and this is what we get

        # (N, 2, D)
        a = """ FILL IN """

        # (N, D)
        a_avg = """ FILL IN """

        # Each of these is (N, D) since each target has 1 word
        b = """ FILL IN """

        # The product between each context and target vector. Look at the Skip-Gram code.
        # The logits is now (N, 1) since we sum across the final dimension.
        logits = """ FILL IN """

        return logits

In [None]:
@torch.no_grad()
def validate_embeddings(
    model,
    valid_ids,
    itos
):
    """ Validation logic """

    # We will use context embeddings to get the most similar words
    # Other strategies include: using target embeddings, mean embeddings after avaraging context/target
    embedding_weights = model.A.weight

    normalized_embeddings = embedding_weights.cpu() / np.sqrt(
        np.sum(embedding_weights.cpu().numpy()**2, axis=1, keepdims=True)
    )

    # Get the embeddings corresponding to valid_term_ids
    valid_embeddings = normalized_embeddings[valid_ids, :]

    # Compute the similarity between valid_term_ids (S) and all the embeddings (V)
    # We do S x d (d x V) => S x D and sort by negative similarity
    top_k = 10 # Top k items will be displayed
    similarity = np.dot(valid_embeddings.cpu().numpy(), normalized_embeddings.cpu().numpy().T)

    # Invert similarity matrix to negative
    # Ignore the first one because that would be the same word as the probe word
    similarity_top_k = np.argsort(-similarity, axis=1)[:, 1: top_k+1]

    # Print the output.
    for i, word_id in enumerate(valid_ids):
        # j >= 1 here since we don't want to include the word itself.
        similar_word_str = ', '.join([itos[j] for j in similarity_top_k[i, :] if j >= 1])
        # This might need a fix!
        print(f"{itos[word_id]}: {similar_word_str}")

    print('\n')

### Set up the model

In [None]:
LR = 10.0
NUM_EPOCHS = 10
EMBED_DIM = 300

In [None]:
model = CBOWNegativeSampling(len(VOCAB), EMBED_DIM).to(DEVICE)
optimizer = torch.optim.SGD(model.parameters(), lr=LR)

# The learning rate is lowered every epoch by 1/10
# Is this a good idea?
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1, gamma=0.1)

In [None]:
model

In [None]:
validate_embeddings(model, valid_ids, itos)

### Train the model

In [None]:
ratios = []

def train(dataloader, model, optimizer, epoch):
    model.train()
    total_acc, total_count, total_loss, total_batches = 0, 0, 0.0, 0.0
    log_interval = 500

    for idx, x_batch in tqdm(enumerate(dataloader)):

        x_batch = x_batch[0]

        batch_size = x_batch.shape[0]

        # Zero the gradient so they don't accumulate
        """ FILL IN """

        logits = model(x_batch)

        # Get the positive samples loss. Notice we use weights here
        positive_loss = """ FILL IN """

        # For each batch, get some negative samples
        # We need a total of len(y_batch) * K samples across a batch
        # We then reshape this batch
        # These are effectively the output words
        negative_samples = """ FILL IN """

        # Context words are 2m things, m = 1 so w_context is (N, 2) while wc is (N, 1)
        w_context, wc = """ FILL IN """

        """
        if w_context looks like below (batch_size = 3)
        [
        (a, b),
        (c, d),
        (e, f)
        ] and K = 2 we'd like to get:

        [
        (a, b),
        (a, b),
        (c, d),
        (c, d),
        (e, f),
        (e, f)
        ]

        This will be batch_size * K rows.
        """

        # This should be (N * K, 2)
        w_context = torch.concat([
            w.repeat(K, 1) for w in torch.tensor(w_context).split(1)
        ])

        # Remove the last dimension 1
        wc = """ FILL IN """

        # Get the negative samples. This should be (N * K, 3)
        # Concatenate the w_context and wc along the column. Make sure everything is on CUDA / MPS or CPU
        x_batch_negative = """ FILL IN """

        """
        Note the way we formulated the targets: they are all 0 since these are negative samples.
        We do the BCEWithLogitsLoss by hand basically here.
        Notice we sum across the negative samples, per positive word.

        This is literally the equation in the lecture notes.
        """

        # (N, K, D) -> (N, D) -> (N)
        # Look at the Skip-Gram notebook
        negative_loss = """ FILL IN """

        loss = """ FILL IN """

        # Get the gradients via back propagation
        """ FILL IN """

        # Clip the gradients? Generally a good idea
        torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)


        # Do an optimization step. Update the parameters A and B
        """ FILL IN """

        # Get the new loss
        total_loss += """ FILL IN """

        # Update the batch count
        total_batches += """ FILL IN """

        if idx % log_interval == 0:
            print(
                "| epoch {:3d} | {:5d}/{:5d} batches "
                "| loss {:8.3f} ".format(
                    epoch,
                    idx,
                    len(dataloader),
                    total_loss / total_batches
                )
            )
            validate_embeddings(model, valid_ids, itos)
            total_loss, total_batches = 0.0, 0.0

### Some results from the run look like below:

Somewhere inside of 2 iterations you should get sensible associattions.
Paste here a screenshot of the closest vectors.

In [None]:
for epoch in range(1, NUM_EPOCHS + 1):
    epoch_start_time = time.time()

    train(train_dl, model, optimizer, epoch)
    # We have a learning rate scheduler here

    # Basically, given the state of the optimizer, this lowers the learning rate in a smart way
    scheduler.step()

  w.repeat(K, 1) for w in torch.tensor(w_context).split(1)
1it [00:00,  2.46it/s]

| epoch   1 |     0/32580 batches | loss    4.027 
money: coyotes, furtado, flanking, managed, infogrames, startrek, reyna, damian, larry, pseudonyms
lion: aksai, dumpster, interpolation, mixture, rale, sabor, aphasia, prokaryotes, gametophyte, urology
africa: virginian, wendish, caryophyllales, electricity, warlike, swamp, bombay, catechisms, surrealistic, jacques
musician: lothal, exponent, orsay, awami, screwed, aedui, vassalage, smalltalk, padova, ivo
dance: akhmatova, monty, chunk, bofors, desqview, nephew, emitter, quaternary, recombination, strasser




501it [01:11,  6.33it/s]

| epoch   1 |   500/32580 batches | loss    3.719 
money: coyotes, furtado, flanking, managed, infogrames, startrek, reyna, damian, larry, speedway
lion: aksai, dumpster, interpolation, mixture, rale, sabor, aphasia, prokaryotes, gametophyte, urology
africa: virginian, wendish, caryophyllales, bombay, jacques, electricity, warlike, inactivation, surrealistic, catechisms
musician: lothal, exponent, orsay, awami, screwed, vassalage, aedui, smalltalk, padova, gatt
dance: akhmatova, monty, bofors, chunk, desqview, nephew, emitter, quaternary, recombination, strasser




1001it [02:21,  6.29it/s]

| epoch   1 |  1000/32580 batches | loss    3.363 
money: coyotes, furtado, flanking, managed, reyna, infogrames, startrek, damian, speedway, airfields
lion: aksai, dumpster, mixture, interpolation, rale, sabor, gametophyte, prokaryotes, aphasia, urology
africa: virginian, bombay, wendish, caryophyllales, jacques, electricity, warlike, inactivation, surrealistic, catechisms
musician: lothal, exponent, orsay, awami, screwed, vassalage, smalltalk, aedui, padova, gatt
dance: akhmatova, monty, chunk, bofors, desqview, nephew, emitter, quaternary, recombination, strasser




1501it [03:33,  6.20it/s]

| epoch   1 |  1500/32580 batches | loss    2.968 
money: coyotes, furtado, flanking, managed, reyna, infogrames, startrek, damian, speedway, larry
lion: aksai, dumpster, mixture, interpolation, sabor, rale, gametophyte, prokaryotes, aphasia, krishnamurti
africa: virginian, bombay, jacques, caryophyllales, wendish, electricity, inactivation, warlike, surrealistic, beckwith
musician: lothal, orsay, exponent, awami, vassalage, smalltalk, vestibule, felicity, coherentism, padova
dance: akhmatova, chunk, monty, bofors, desqview, emitter, nephew, quaternary, recombination, strasser




2001it [04:43,  6.10it/s]

| epoch   1 |  2000/32580 batches | loss    2.679 
money: coyotes, furtado, flanking, managed, infogrames, reyna, damian, startrek, speedway, airfields
lion: aksai, dumpster, mixture, interpolation, sabor, gametophyte, rale, prokaryotes, aphasia, krishnamurti
africa: bombay, jacques, virginian, caryophyllales, electricity, wendish, warlike, inactivation, late, through
musician: lothal, orsay, exponent, awami, coherentism, vassalage, vestibule, felicity, antonia, hughes
dance: akhmatova, monty, chunk, bofors, emitter, nephew, desqview, quaternary, recombination, strasser




2501it [05:55,  6.19it/s]

| epoch   1 |  2500/32580 batches | loss    2.526 
money: coyotes, flanking, managed, furtado, damian, infogrames, larry, airfields, reyna, startrek
lion: aksai, dumpster, mixture, sabor, interpolation, gametophyte, prokaryotes, krishnamurti, rale, aphasia
africa: also, an, through, bombay, jacques, which, virginian, time, people, john
musician: lothal, orsay, exponent, awami, coherentism, vestibule, vassalage, felicity, hughes, antonia
dance: akhmatova, monty, bofors, chunk, emitter, nephew, desqview, quaternary, recombination, rearwards




3001it [07:05,  6.32it/s]

| epoch   1 |  3000/32580 batches | loss    2.439 
money: coyotes, flanking, managed, furtado, damian, their, airfields, larry, infogrames, speedway
lion: aksai, dumpster, mixture, sabor, interpolation, gametophyte, prokaryotes, krishnamurti, rale, spun
africa: also, which, one, used, people, an, two, through, time, it
musician: lothal, orsay, exponent, vestibule, awami, coherentism, vassalage, hughes, felicity, angled
dance: akhmatova, monty, bofors, chunk, nephew, emitter, quaternary, desqview, recombination, westenra




3501it [08:16,  6.28it/s]

| epoch   1 |  3500/32580 batches | loss    2.342 
money: coyotes, managed, flanking, furtado, damian, their, larry, speedway, airfields, infogrames
lion: aksai, dumpster, mixture, sabor, prokaryotes, interpolation, gametophyte, krishnamurti, spun, disciplining
africa: also, an, people, used, one, which, time, seven, that, john
musician: lothal, orsay, vestibule, exponent, awami, coherentism, vassalage, hughes, rest, make
dance: akhmatova, monty, bofors, chunk, nephew, emitter, desqview, quaternary, range, recombination




4001it [09:26,  6.23it/s]

| epoch   1 |  4000/32580 batches | loss    2.305 
money: coyotes, their, managed, flanking, damian, furtado, larry, airfields, speedway, an
lion: aksai, dumpster, mixture, sabor, gametophyte, prokaryotes, interpolation, krishnamurti, aphasia, disciplining
africa: also, used, time, which, an, people, one, seven, five, john
musician: lothal, orsay, vestibule, exponent, make, hughes, like, rest, coherentism, awami
dance: akhmatova, bofors, monty, chunk, nephew, emitter, quaternary, range, desqview, recombination




4501it [10:36,  6.26it/s]

| epoch   1 |  4500/32580 batches | loss    2.220 
money: their, coyotes, managed, an, flanking, sea, damian, larry, other, furtado
lion: aksai, dumpster, mixture, sabor, gametophyte, interpolation, prokaryotes, spun, krishnamurti, aphasia
africa: also, which, time, used, an, people, one, through, five, two
musician: lothal, orsay, like, vestibule, make, war, hughes, exponent, rest, removal
dance: akhmatova, range, bofors, chunk, nephew, emitter, names, monty, strasser, rearwards




5001it [11:46,  6.26it/s]

| epoch   1 |  5000/32580 batches | loss    2.113 
money: their, an, managed, sea, only, coyotes, other, many, flanking, known
lion: aksai, dumpster, mixture, sabor, gametophyte, interpolation, spun, prokaryotes, disciplining, krishnamurti
africa: also, used, time, which, people, three, two, seven, four, five
musician: lothal, orsay, war, country, make, rest, vestibule, group, like, hughes
dance: range, akhmatova, names, nephew, bofors, emitter, chunk, programming, rearwards, among




5501it [12:57,  6.31it/s]

| epoch   1 |  5500/32580 batches | loss    2.080 
money: their, an, other, sea, only, many, managed, known, there, it
lion: aksai, dumpster, mixture, gametophyte, sabor, spun, interpolation, prokaryotes, disciplining, discussed
africa: also, used, time, people, which, five, three, four, seven, one
musician: war, lothal, like, group, north, make, country, people, orsay, south
dance: range, names, nephew, akhmatova, among, bofors, others, emitter, hundreds, chunk




6001it [14:07,  6.28it/s]

| epoch   1 |  6000/32580 batches | loss    2.053 
money: their, only, an, many, there, other, known, it, sea, managed
lion: aksai, dumpster, mixture, gametophyte, sabor, prokaryotes, spun, interpolation, disciplining, discussed
africa: also, used, time, people, which, four, five, two, three, one
musician: war, group, like, lothal, make, people, north, country, south, orsay
dance: range, names, others, among, much, hundreds, nephew, other, bofors, akhmatova




6501it [15:18,  6.16it/s]

| epoch   1 |  6500/32580 batches | loss    1.983 
money: their, many, only, there, known, other, all, which, it, usually
lion: aksai, mixture, dumpster, gametophyte, sabor, prokaryotes, spun, discussed, disciplining, krishnamurti
africa: time, used, also, people, four, which, two, often, three, through
musician: war, north, group, like, make, south, country, people, lothal, league
dance: range, among, others, names, hundreds, much, nephew, other, british, bofors




7001it [16:29,  6.22it/s]

| epoch   1 |  7000/32580 batches | loss    2.010 
money: there, their, only, many, known, it, all, which, usually, most
lion: aksai, dumpster, mixture, spun, gametophyte, prokaryotes, discussed, disciplining, sabor, krishnamurti
africa: time, people, used, also, four, seven, often, five, three, one
musician: north, group, war, like, country, south, make, people, lothal, rest
dance: range, among, others, much, names, other, hundreds, british, line, career




7501it [17:41,  6.21it/s]

| epoch   1 |  7500/32580 batches | loss    1.959 
money: only, there, known, their, many, it, then, usually, which, all
lion: aksai, mixture, dumpster, spun, gametophyte, discussed, prokaryotes, disciplining, met, sabor
africa: people, four, time, used, often, seven, one, also, five, three
musician: group, war, north, like, country, south, make, rest, people, eastern
dance: range, others, among, other, names, much, hundreds, line, british, career




8001it [18:53,  6.15it/s]

| epoch   1 |  8000/32580 batches | loss    1.891 
money: there, only, known, it, many, usually, use, all, then, their
lion: aksai, mixture, dumpster, spun, discussed, met, gametophyte, prokaryotes, disciplining, sabor
africa: time, people, four, used, seven, three, often, nine, also, one
musician: group, war, north, like, south, country, culture, rest, people, eastern
dance: range, among, others, other, names, much, british, hundreds, same, line




8501it [20:05,  6.16it/s]

| epoch   1 |  8500/32580 batches | loss    1.896 
money: only, there, known, use, all, then, usually, it, many, well
lion: aksai, mixture, dumpster, spun, discussed, gametophyte, met, prokaryotes, disciplining, sabor
africa: time, people, used, four, seven, nine, three, often, five, which
musician: group, north, war, south, like, country, eastern, culture, rest, based
dance: range, other, others, among, names, much, british, hundreds, same, terms




9001it [21:17,  6.19it/s]

| epoch   1 |  9000/32580 batches | loss    1.884 
money: only, there, use, all, known, then, usually, well, god, death
lion: aksai, mixture, dumpster, discussed, spun, met, gametophyte, prokaryotes, disciplining, sabor
africa: time, people, four, seven, used, nine, often, three, five, history
musician: group, north, war, south, culture, country, eastern, like, rest, based
dance: range, other, others, names, among, much, hundreds, british, terms, latter




9501it [22:28,  6.29it/s]

| epoch   1 |  9500/32580 batches | loss    1.870 
money: only, there, known, use, usually, all, well, death, god, people
lion: aksai, mixture, dumpster, discussed, met, spun, gametophyte, disciplining, prokaryotes, sabor
africa: time, people, used, four, seven, nine, five, three, which, use
musician: group, north, war, south, culture, eastern, country, rest, led, like
dance: range, other, others, names, terms, among, hundreds, british, space, much




10002it [23:39,  6.49it/s]

| epoch   1 | 10000/32580 batches | loss    1.866 
money: only, use, known, there, usually, death, then, well, god, all
lion: aksai, mixture, dumpster, met, discussed, gametophyte, spun, disciplining, prokaryotes, sabor
africa: four, seven, time, used, people, five, nine, three, history, university
musician: group, north, war, culture, eastern, south, country, rest, led, league
dance: range, others, other, names, terms, british, space, among, hundreds, version




10501it [24:49,  6.23it/s]

| epoch   1 | 10500/32580 batches | loss    1.858 
money: known, use, death, usually, only, god, support, there, well, all
lion: aksai, mixture, met, dumpster, discussed, spun, gametophyte, disciplining, prokaryotes, czech
africa: four, seven, time, nine, five, used, history, people, university, three
musician: group, north, war, eastern, culture, south, rest, country, led, like
dance: range, others, other, names, terms, british, space, hundreds, version, among




11001it [26:01,  6.21it/s]

| epoch   1 | 11000/32580 batches | loss    1.840 
money: known, death, use, usually, god, then, well, all, people, support
lion: mixture, aksai, met, dumpster, discussed, spun, gametophyte, disciplining, prokaryotes, sabor
africa: four, seven, used, nine, history, five, time, university, north, people
musician: group, north, eastern, war, culture, south, rest, country, led, like
dance: range, others, names, other, terms, space, british, end, among, hundreds




11501it [27:12,  6.28it/s]

| epoch   1 | 11500/32580 batches | loss    1.776 
money: use, death, known, god, support, usually, people, well, list, then
lion: aksai, mixture, met, discussed, dumpster, spun, gametophyte, prokaryotes, disciplining, czech
africa: four, seven, nine, five, used, america, time, history, three, north
musician: north, group, eastern, culture, war, rest, south, country, league, led
dance: range, others, names, other, terms, space, end, hundreds, british, version




12001it [28:23,  6.18it/s]

| epoch   1 | 12000/32580 batches | loss    1.767 
money: use, death, god, known, support, well, list, only, people, then
lion: mixture, aksai, met, discussed, dumpster, spun, prokaryotes, disciplining, gametophyte, czech
africa: four, seven, america, history, nine, used, five, time, north, three
musician: north, culture, eastern, group, rest, war, south, led, country, league
dance: range, others, other, end, names, british, terms, list, line, among




12501it [29:33,  6.24it/s]

| epoch   1 | 12500/32580 batches | loss    1.722 
money: use, death, god, support, known, well, list, people, work, example
lion: mixture, aksai, met, discussed, dumpster, spun, prokaryotes, disciplining, gametophyte, sabor
africa: seven, america, four, five, nine, history, used, time, north, three
musician: group, north, culture, war, eastern, country, rest, south, led, people
dance: range, others, other, names, end, terms, line, list, british, version




13001it [30:44,  6.16it/s]

| epoch   1 | 13000/32580 batches | loss    1.751 
money: use, death, god, support, known, usually, list, well, work, then
lion: mixture, aksai, met, discussed, dumpster, spun, disciplining, prokaryotes, gametophyte, included
africa: america, seven, five, four, north, history, nine, three, area, end
musician: north, group, war, culture, eastern, south, country, rest, league, led
dance: range, others, other, names, end, terms, line, list, space, united




13501it [31:55,  6.13it/s]

| epoch   1 | 13500/32580 batches | loss    1.713 
money: use, death, support, god, known, list, usually, work, example, well
lion: mixture, aksai, met, discussed, dumpster, spun, prokaryotes, abuse, disciplining, gametophyte
africa: america, seven, five, north, history, end, nine, area, eight, late
musician: north, group, war, culture, south, eastern, country, led, people, league
dance: range, other, others, end, terms, list, names, line, united, last




14001it [33:06,  6.20it/s]

| epoch   1 | 14000/32580 batches | loss    1.725 
money: use, support, death, god, known, work, list, well, then, people
lion: mixture, aksai, met, discussed, dumpster, abuse, spun, included, disciplining, prokaryotes
africa: america, seven, five, north, history, nine, end, part, late, eight
musician: north, group, war, culture, eastern, country, south, led, people, league
dance: range, other, others, list, united, end, terms, british, last, line




14501it [34:18,  6.09it/s]

| epoch   1 | 14500/32580 batches | loss    1.644 
money: support, use, death, work, god, list, people, example, known, usually
lion: mixture, aksai, met, discussed, dumpster, abuse, included, spun, disciplining, czech
africa: america, seven, north, history, five, end, nine, late, part, eight
musician: north, group, eastern, south, culture, war, country, led, people, rest
dance: range, other, united, end, others, list, terms, last, same, use




15001it [35:29,  6.23it/s]

| epoch   1 | 15000/32580 batches | loss    1.684 
money: support, use, death, god, list, work, people, example, known, name
lion: mixture, aksai, met, discussed, abuse, dumpster, included, introduced, spun, czech
africa: america, five, seven, north, end, history, nine, part, late, four
musician: north, group, war, south, eastern, culture, country, led, rest, league
dance: range, united, other, others, list, end, terms, british, same, line




15501it [36:40,  6.09it/s]

| epoch   1 | 15500/32580 batches | loss    1.682 
money: support, use, death, god, work, list, example, people, known, name
lion: mixture, met, aksai, discussed, included, abuse, saints, introduced, dumpster, czech
africa: america, seven, end, north, history, five, nine, part, late, england
musician: north, group, war, south, culture, eastern, country, led, rest, league
dance: range, united, list, terms, others, other, end, british, same, those




16001it [37:51,  6.22it/s]

| epoch   1 | 16000/32580 batches | loss    1.650 
money: support, use, death, work, god, name, example, list, well, known
lion: mixture, met, aksai, discussed, included, abuse, saints, quantum, introduced, dumpster
africa: america, end, north, history, seven, five, england, late, eight, nine
musician: north, group, culture, south, eastern, war, country, led, rest, league
dance: range, list, united, terms, other, end, others, same, british, those




16501it [39:02,  6.16it/s]

| epoch   1 | 16500/32580 batches | loss    1.629 
money: support, use, death, work, god, name, example, well, list, known
lion: mixture, met, aksai, discussed, included, introduced, saints, abuse, quantum, organisation
africa: america, end, north, history, seven, five, nine, late, england, part
musician: north, group, south, culture, war, eastern, country, rest, led, england
dance: range, list, end, united, terms, others, other, british, same, those




17001it [40:13,  6.13it/s]

| epoch   1 | 17000/32580 batches | loss    1.587 
money: support, use, death, work, god, name, example, production, well, known
lion: mixture, met, discussed, aksai, included, saints, introduced, abuse, quantum, czech
africa: america, end, north, seven, history, five, england, late, part, u
musician: north, group, south, war, culture, country, eastern, england, led, europe
dance: range, list, end, terms, united, others, other, same, last, those




17501it [41:24,  6.06it/s]

| epoch   1 | 17500/32580 batches | loss    1.685 
money: support, use, death, work, god, name, example, production, people, usually
lion: mixture, met, discussed, aksai, included, saints, introduced, abuse, czech, organisation
africa: america, end, history, north, seven, england, five, part, nine, late
musician: north, group, south, war, culture, country, england, eastern, led, actress
dance: range, terms, list, end, united, others, same, other, last, country




18001it [42:35,  6.26it/s]

| epoch   1 | 18000/32580 batches | loss    1.623 
money: support, use, work, death, name, god, example, view, production, well
lion: mixture, met, discussed, included, aksai, introduced, saints, abuse, czech, organisation
africa: america, end, north, history, five, u, seven, part, late, nine
musician: north, group, south, culture, country, england, war, eastern, actress, europe
dance: range, terms, list, united, other, end, others, same, last, country




18501it [43:47,  6.15it/s]

| epoch   1 | 18500/32580 batches | loss    1.524 
money: support, use, work, death, god, example, name, view, production, list
lion: mixture, met, discussed, introduced, aksai, saints, included, abuse, czech, question
africa: america, end, north, history, nine, u, part, five, europe, late
musician: north, group, culture, south, england, eastern, war, country, actress, europe
dance: range, terms, list, united, end, other, others, same, country, those




19001it [45:00,  6.00it/s]

| epoch   1 | 19000/32580 batches | loss    1.608 
money: support, use, work, death, example, view, name, production, god, list
lion: mixture, met, discussed, introduced, included, saints, aksai, abuse, question, czech
africa: america, end, north, history, seven, five, nine, europe, part, u
musician: north, group, culture, south, england, eastern, war, actress, country, four
dance: range, terms, list, united, other, end, others, same, last, country




19501it [46:13,  6.11it/s]

| epoch   1 | 19500/32580 batches | loss    1.618 
money: support, use, work, death, example, production, view, name, list, nine
lion: mixture, met, discussed, introduced, included, saints, abuse, aksai, question, quantum
africa: america, end, north, history, nine, europe, late, china, eight, seven
musician: north, culture, south, england, war, eastern, group, four, actress, country
dance: range, terms, list, united, other, end, others, same, country, nine




20001it [47:24,  6.24it/s]

| epoch   1 | 20000/32580 batches | loss    1.610 
money: support, use, work, death, view, example, production, name, list, nine
lion: met, mixture, discussed, introduced, included, abuse, saints, aksai, question, quantum
africa: america, end, north, nine, seven, eight, late, europe, china, three
musician: north, culture, south, england, four, eastern, eight, actress, war, group
dance: range, terms, united, list, other, end, country, others, use, york




20501it [48:36,  6.02it/s]

| epoch   1 | 20500/32580 batches | loss    1.524 
money: support, use, work, view, example, death, name, production, nine, list
lion: met, mixture, discussed, introduced, included, abuse, saints, question, aksai, quantum
africa: america, end, north, europe, late, nine, china, seven, england, eight
musician: north, culture, south, england, eastern, actress, eight, rest, four, group
dance: range, terms, list, other, united, end, use, country, others, york




21001it [49:48,  6.01it/s]

| epoch   1 | 21000/32580 batches | loss    1.564 
money: support, work, use, view, death, god, example, name, list, production
lion: met, mixture, discussed, introduced, included, abuse, saints, question, quantum, organisation
africa: america, end, north, europe, late, england, china, nine, eight, france
musician: north, culture, england, eastern, south, rest, actress, war, group, eight
dance: terms, range, list, other, united, end, country, use, others, york




21501it [51:01,  6.30it/s]

| epoch   1 | 21500/32580 batches | loss    1.531 
money: support, work, use, view, death, god, example, production, list, name
lion: met, mixture, discussed, introduced, included, abuse, saints, question, pope, quantum
africa: america, end, north, europe, england, china, nine, late, france, part
musician: north, england, group, culture, eastern, rest, south, eight, actress, fact
dance: terms, range, list, united, other, end, country, use, nine, york




22001it [52:10,  6.22it/s]

| epoch   1 | 22000/32580 batches | loss    1.564 
money: support, work, use, death, view, god, example, production, list, nine
lion: met, mixture, discussed, introduced, included, abuse, saints, question, pope, show
africa: america, end, north, europe, england, china, nine, late, france, part
musician: north, england, rest, south, eight, group, culture, eastern, actress, fact
dance: terms, range, list, united, other, country, end, nine, york, use




22502it [53:21,  6.56it/s]

| epoch   1 | 22500/32580 batches | loss    1.579 
money: support, work, use, death, view, example, god, production, name, usually
lion: met, mixture, discussed, introduced, included, abuse, saints, pope, question, show
africa: america, end, europe, north, england, china, late, nine, france, history
musician: north, england, group, rest, south, culture, eastern, eight, actress, fact
dance: terms, range, list, united, other, country, end, york, same, nine




23002it [54:31,  6.55it/s]

| epoch   1 | 23000/32580 batches | loss    1.490 
money: support, work, use, view, god, death, example, production, name, nine
lion: met, mixture, discussed, included, introduced, abuse, saints, question, pope, organisation
africa: america, end, europe, north, england, china, nine, late, india, france
musician: north, england, rest, eastern, south, actress, eight, culture, group, fact
dance: range, terms, list, other, united, country, end, york, same, nine




23501it [55:41,  6.31it/s]

| epoch   1 | 23500/32580 batches | loss    1.517 
money: support, work, view, use, death, god, example, production, nine, name
lion: met, mixture, discussed, abuse, introduced, included, saints, question, pope, organisation
africa: america, end, europe, north, england, nine, seven, five, china, late
musician: north, rest, england, eight, four, group, eastern, actress, culture, south
dance: range, terms, united, other, list, country, end, york, irish, nine




24001it [56:52,  6.14it/s]

| epoch   1 | 24000/32580 batches | loss    1.525 
money: support, work, use, death, god, example, view, production, name, nine
lion: met, mixture, abuse, discussed, introduced, included, saints, question, pope, society
africa: america, end, north, europe, nine, england, china, india, late, u
musician: north, england, rest, eight, group, eastern, actress, south, fact, culture
dance: terms, range, other, list, united, end, country, york, irish, u




24502it [58:04,  6.60it/s]

| epoch   1 | 24500/32580 batches | loss    1.374 
money: support, work, example, death, use, view, god, production, name, end
lion: met, mixture, abuse, discussed, included, introduced, saints, question, organisation, society
africa: america, end, europe, north, nine, england, china, india, late, eight
musician: actress, north, actor, england, rest, fact, eight, four, group, south
dance: range, terms, other, united, list, end, irish, country, u, york




25001it [59:16,  6.09it/s]

| epoch   1 | 25000/32580 batches | loss    1.307 
money: support, work, death, example, use, god, view, production, nine, end
lion: met, mixture, abuse, introduced, included, discussed, saints, question, organisation, society
africa: america, end, europe, north, nine, england, china, india, late, france
musician: actress, actor, four, north, singer, one, england, writer, u, three
dance: range, terms, other, united, list, country, end, irish, actor, york




25502it [1:00:28,  6.52it/s]

| epoch   1 | 25500/32580 batches | loss    1.413 
money: support, work, death, example, nine, god, use, production, view, end
lion: met, mixture, abuse, introduced, included, discussed, saints, society, question, organisation
africa: america, end, north, europe, nine, india, china, england, eight, france
musician: actress, actor, singer, four, writer, nine, composer, north, six, england
dance: range, terms, other, united, list, country, actor, end, u, york




26002it [1:01:38,  6.65it/s]

| epoch   1 | 26000/32580 batches | loss    1.470 
money: support, work, death, example, use, production, god, view, nine, end
lion: met, mixture, abuse, introduced, included, saints, discussed, society, question, pope
africa: america, end, north, europe, nine, india, china, england, france, eight
musician: actress, actor, singer, writer, nine, composer, seven, four, england, north
dance: range, terms, united, other, list, country, actor, end, u, york




26502it [1:02:48,  6.60it/s]

| epoch   1 | 26500/32580 batches | loss    1.471 
money: support, work, death, example, use, nine, god, end, view, production
lion: met, mixture, abuse, introduced, included, discussed, saints, question, society, pope
africa: america, end, north, nine, europe, india, china, england, france, united
musician: actress, actor, singer, writer, nine, composer, north, four, england, six
dance: range, terms, united, list, other, country, end, actor, york, version




27002it [1:03:58,  6.67it/s]

| epoch   1 | 27000/32580 batches | loss    1.530 
money: support, work, death, use, example, god, production, end, view, nine
lion: met, mixture, abuse, introduced, included, discussed, saints, question, society, pope
africa: america, end, north, europe, india, nine, china, england, france, result
musician: actress, actor, singer, four, writer, composer, nine, north, eight, england
dance: terms, range, united, other, list, country, end, york, zero, actor




27501it [1:05:08,  6.36it/s]

| epoch   1 | 27500/32580 batches | loss    1.453 
money: support, work, god, death, example, use, production, end, view, much
lion: met, mixture, introduced, abuse, included, discussed, question, saints, society, pope
africa: america, end, north, europe, india, china, nine, england, result, united
musician: actress, actor, singer, writer, four, composer, north, nine, eight, england
dance: terms, range, united, other, list, country, zero, end, actor, york




28001it [1:06:18,  6.35it/s]

| epoch   1 | 28000/32580 batches | loss    1.457 
money: support, work, use, god, death, example, production, view, end, nine
lion: met, mixture, introduced, abuse, included, discussed, question, saints, pope, society
africa: america, end, north, europe, nine, india, china, england, result, united
musician: actress, actor, singer, nine, writer, composer, north, rest, eight, four
dance: terms, united, range, other, list, country, actor, end, york, nine




28451it [1:07:21,  7.09it/s]