## CBOW - Word2Vec Implementation Pytorch

In [1]:
import re
import nltk
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
from nltk.corpus import webtext
from keras.preprocessing import sequence
from keras.utils import np_utils
from keras.preprocessing import text
from keras.utils import np_utils
from keras.preprocessing import sequence
import matplotlib.pyplot as plt

pd.options.display.max_colwidth = 200
%matplotlib inline

Using TensorFlow backend.


In [2]:
import nltk
nltk.download('stopwords')
nltk.download('webtext')
from nltk.corpus import brown

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Sajid\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package webtext to
[nltk_data]     C:\Users\Sajid\AppData\Roaming\nltk_data...
[nltk_data]   Package webtext is already up-to-date!


## Pre-Processing text Code

In [12]:
wordpt = nltk.WordPunctTokenizer()
stop_words = nltk.corpus.stopwords.words('english')

def normalize_document(doc):
    # lower case and remove special characters\whitespaces
    doc = re.sub(r'[^a-zA-Z\s]', '', doc, re.I|re.A)
    doc = doc.lower()
    doc = doc.strip()
    # tokenize document
    tokens = wordpt.tokenize(doc)
    # filter stopwords out of document
    filtered_tokens = [token for token in tokens if token not in stop_words]
    # re-create document from filtered tokens
    doc = ' '.join(filtered_tokens)
    return doc

normalize_corpus = np.vectorize(normalize_document)

In [13]:
corpus = ['The sky is blue and beautiful.',
          'Love this blue and beautiful sky!',
          'The quick brown fox jumps over the lazy dog.',
          "A king's breakfast has sausages, ham, bacon, eggs, toast and beans",
          'I love green eggs, ham, sausages and bacon!',
          'The brown fox is quick and the blue dog is lazy!',
          'The sky is very blue and the sky is very beautiful today',
          'The dog is lazy but the brown fox is quick!'    
]
labels = ['weather', 'weather', 'animals', 'food', 'food', 'animals', 'weather', 'animals']

corpus = np.array(corpus)
corpus_df = pd.DataFrame({'Document': corpus, 
                          'Category': labels})
corpus_df = corpus_df[['Document', 'Category']]
corpus_df

Unnamed: 0,Document,Category
0,The sky is blue and beautiful.,weather
1,Love this blue and beautiful sky!,weather
2,The quick brown fox jumps over the lazy dog.,animals
3,"A king's breakfast has sausages, ham, bacon, eggs, toast and beans",food
4,"I love green eggs, ham, sausages and bacon!",food
5,The brown fox is quick and the blue dog is lazy!,animals
6,The sky is very blue and the sky is very beautiful today,weather
7,The dog is lazy but the brown fox is quick!,animals


In [14]:
# build a sample vocab
vocab = []
print(webtext.fileids())
print(len(webtext.raw('firefox.txt'))) 
for fileid in webtext.fileids():
    vocab.append(webtext.raw('firefox.txt'))

    #print(brown.raw('cb01').strip()[:1000])  

['firefox.txt', 'grail.txt', 'overheard.txt', 'pirates.txt', 'singles.txt', 'wine.txt']
564601


### text preprocessing (Remove tags e.g HTML,Remove special characters, Remove stopwords) === Clean data

In [15]:
tokenizer = text.Tokenizer()
tokenizer.fit_on_texts(corpus)
word2id = tokenizer.word_index

word2id['PAD'] = 0
id2word = {v:k for k, v in word2id.items()}
wids = [[word2id[w] for w in text.text_to_word_sequence(doc)] for doc in corpus]

vocab_size = len(word2id)
embed_size = 100
window_size = 2

print('Vocabulary Size:', vocab_size)
print('Vocabulary Sample:', list(word2id.items())[:10])

Vocabulary Size: 31
Vocabulary Sample: [('the', 1), ('is', 2), ('and', 3), ('sky', 4), ('blue', 5), ('beautiful', 6), ('quick', 7), ('brown', 8), ('fox', 9), ('lazy', 10)]


### [context_words, target_word] pairs

In [16]:
def generate_context_word_pairs(corpus, window_size, vocab_size):
    X = []
    Y = []
    context_length = window_size*2
    for words in wids:
        sentence_length = len(words)
        for index, word in enumerate(words):           
            start = index - window_size
            end = index + window_size + 1
            context = [words[i] for i in range(start, end)if 0 <= i < sentence_length and i != index]
            x = sequence.pad_sequences([context], maxlen=context_length)
            X.append(x)
            Y.append(word)
    return X,Y

## CBOW (Contineous bag of Words Model architecture)

In [17]:
import torch
import torch.nn as nn
import numpy as np

class CBOW(torch.nn.Module):

    def __init__(self, inp_size , vocab_size, embedding_dim=100):
        super(CBOW, self).__init__()
        self.embeddings = nn.Embedding(vocab_size, embedding_dim)
        self.linear1 = nn.Linear(embedding_dim, 100)
        self.activation_function1 = nn.ReLU()        
        self.linear2 = nn.Linear(100, vocab_size)
        self.activation_function2 = nn.LogSoftmax(dim = -1)
        
    def forward(self, inputs):
        embeds = sum(self.embeddings(torch.from_numpy(inputs).long().cuda())).view(1,-1)
        out = self.linear1(embeds)
        out = self.activation_function1(out)
        out = self.linear2(out)
        out = self.activation_function2(out)
        return out

    
model = CBOW(window_size*2,vocab_size).cuda()
loss_function = nn.NLLLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)

# Print model's state_dict
print("Model's state_dict:")
for param_tensor in model.state_dict():
    print(param_tensor, "\t", model.state_dict()[param_tensor].size())

# Print optimizer's state_dict
print("Optimizer's state_dict:")
for var_name in optimizer.state_dict():
    print(var_name, "\t", optimizer.state_dict()[var_name])

torch.save(model.state_dict(), "Cbow_Weights")

# model = TheModelClass(*args, **kwargs)
# model.load_state_dict(torch.load(PATH))
# model.eval()

Model's state_dict:
embeddings.weight 	 torch.Size([31, 100])
linear1.weight 	 torch.Size([100, 100])
linear1.bias 	 torch.Size([100])
linear2.weight 	 torch.Size([31, 100])
linear2.bias 	 torch.Size([31])
Optimizer's state_dict:
state 	 {}
param_groups 	 [{'lr': 0.001, 'momentum': 0, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'params': [2698020534024, 2698020535752, 2698020535320, 2698020533952, 2698020534096]}]


In [22]:
for epoch in range(1, 1000):
    loss = 0
    i = 0
    X,Y = generate_context_word_pairs(corpus=wids, window_size=window_size, vocab_size=vocab_size)
    for x, y in zip(X,Y):
        i += 1
        optimizer.zero_grad()
        log_probs = model(x[0])
        loss = loss_function(log_probs,torch.Tensor([y]).long().cuda())
        loss.backward()
        optimizer.step()
        loss += loss.data
    print('Epoch:', epoch, '\tLoss:', loss)

Epoch: 1 	Loss: tensor(4.0768, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 2 	Loss: tensor(3.9260, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 3 	Loss: tensor(3.7763, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 4 	Loss: tensor(3.6316, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 5 	Loss: tensor(3.4874, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 6 	Loss: tensor(3.3656, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 7 	Loss: tensor(3.2325, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 8 	Loss: tensor(3.1233, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 9 	Loss: tensor(3.0060, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 10 	Loss: tensor(2.8863, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 11 	Loss: tensor(2.7681, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 12 	Loss: tensor(2.6619, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 13 	Loss: tensor(2.5569, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 14 	Loss: tensor(2.4583, device='cuda:0', grad_fn=<Ad

Epoch: 113 	Loss: tensor(0.3797, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 114 	Loss: tensor(0.3743, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 115 	Loss: tensor(0.3716, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 116 	Loss: tensor(0.3680, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 117 	Loss: tensor(0.3652, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 118 	Loss: tensor(0.3605, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 119 	Loss: tensor(0.3582, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 120 	Loss: tensor(0.3555, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 121 	Loss: tensor(0.3512, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 122 	Loss: tensor(0.3485, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 123 	Loss: tensor(0.3449, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 124 	Loss: tensor(0.3423, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 125 	Loss: tensor(0.3381, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 126 	Loss: tensor(0.3358, devic

Epoch: 223 	Loss: tensor(0.1731, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 224 	Loss: tensor(0.1720, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 225 	Loss: tensor(0.1711, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 226 	Loss: tensor(0.1700, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 227 	Loss: tensor(0.1695, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 228 	Loss: tensor(0.1685, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 229 	Loss: tensor(0.1674, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 230 	Loss: tensor(0.1666, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 231 	Loss: tensor(0.1654, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 232 	Loss: tensor(0.1647, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 233 	Loss: tensor(0.1637, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 234 	Loss: tensor(0.1629, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 235 	Loss: tensor(0.1622, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 236 	Loss: tensor(0.1612, devic

Epoch: 333 	Loss: tensor(0.1081, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 334 	Loss: tensor(0.1075, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 335 	Loss: tensor(0.1072, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 336 	Loss: tensor(0.1067, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 337 	Loss: tensor(0.1064, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 338 	Loss: tensor(0.1060, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 339 	Loss: tensor(0.1058, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 340 	Loss: tensor(0.1055, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 341 	Loss: tensor(0.1050, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 342 	Loss: tensor(0.1046, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 343 	Loss: tensor(0.1042, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 344 	Loss: tensor(0.1040, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 345 	Loss: tensor(0.1036, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 346 	Loss: tensor(0.1033, devic

Epoch: 443 	Loss: tensor(0.0775, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 444 	Loss: tensor(0.0773, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 445 	Loss: tensor(0.0770, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 446 	Loss: tensor(0.0770, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 447 	Loss: tensor(0.0767, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 448 	Loss: tensor(0.0766, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 449 	Loss: tensor(0.0763, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 450 	Loss: tensor(0.0762, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 451 	Loss: tensor(0.0758, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 452 	Loss: tensor(0.0757, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 453 	Loss: tensor(0.0757, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 454 	Loss: tensor(0.0754, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 455 	Loss: tensor(0.0752, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 456 	Loss: tensor(0.0749, devic

Epoch: 553 	Loss: tensor(0.0605, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 554 	Loss: tensor(0.0603, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 555 	Loss: tensor(0.0602, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 556 	Loss: tensor(0.0600, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 557 	Loss: tensor(0.0599, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 558 	Loss: tensor(0.0598, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 559 	Loss: tensor(0.0597, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 560 	Loss: tensor(0.0596, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 561 	Loss: tensor(0.0595, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 562 	Loss: tensor(0.0593, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 563 	Loss: tensor(0.0593, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 564 	Loss: tensor(0.0591, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 565 	Loss: tensor(0.0591, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 566 	Loss: tensor(0.0588, devic

Epoch: 663 	Loss: tensor(0.0494, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 664 	Loss: tensor(0.0494, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 665 	Loss: tensor(0.0493, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 666 	Loss: tensor(0.0493, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 667 	Loss: tensor(0.0491, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 668 	Loss: tensor(0.0491, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 669 	Loss: tensor(0.0490, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 670 	Loss: tensor(0.0489, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 671 	Loss: tensor(0.0489, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 672 	Loss: tensor(0.0488, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 673 	Loss: tensor(0.0487, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 674 	Loss: tensor(0.0486, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 675 	Loss: tensor(0.0485, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 676 	Loss: tensor(0.0485, devic

Epoch: 773 	Loss: tensor(0.0421, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 774 	Loss: tensor(0.0420, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 775 	Loss: tensor(0.0420, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 776 	Loss: tensor(0.0420, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 777 	Loss: tensor(0.0420, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 778 	Loss: tensor(0.0419, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 779 	Loss: tensor(0.0419, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 780 	Loss: tensor(0.0419, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 781 	Loss: tensor(0.0418, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 782 	Loss: tensor(0.0418, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 783 	Loss: tensor(0.0417, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 784 	Loss: tensor(0.0417, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 785 	Loss: tensor(0.0416, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 786 	Loss: tensor(0.0416, devic

Epoch: 883 	Loss: tensor(0.0366, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 884 	Loss: tensor(0.0365, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 885 	Loss: tensor(0.0365, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 886 	Loss: tensor(0.0365, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 887 	Loss: tensor(0.0364, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 888 	Loss: tensor(0.0364, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 889 	Loss: tensor(0.0363, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 890 	Loss: tensor(0.0363, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 891 	Loss: tensor(0.0363, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 892 	Loss: tensor(0.0363, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 893 	Loss: tensor(0.0362, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 894 	Loss: tensor(0.0362, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 895 	Loss: tensor(0.0361, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 896 	Loss: tensor(0.0361, devic

Epoch: 993 	Loss: tensor(0.0326, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 994 	Loss: tensor(0.0325, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 995 	Loss: tensor(0.0325, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 996 	Loss: tensor(0.0325, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 997 	Loss: tensor(0.0324, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 998 	Loss: tensor(0.0324, device='cuda:0', grad_fn=<AddBackward0>)
Epoch: 999 	Loss: tensor(0.0324, device='cuda:0', grad_fn=<AddBackward0>)


In [23]:
weights = model.embeddings(torch.Tensor([list(range(0,vocab_size))]).long().cuda())

pd.DataFrame(weights.view(-1,100).tolist(), index=list(id2word.values())[0:]).head(10)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,90,91,92,93,94,95,96,97,98,99
the,0.94906,0.601485,-0.046146,0.622524,-0.912017,-0.862392,0.208219,-2.355533,-0.207395,-0.069587,...,1.51568,0.225686,0.074628,1.436258,0.316213,-0.04405,-0.19256,-1.278589,-0.644435,0.524522
is,-0.204698,-0.714867,-0.3558,1.049916,-2.287774,-1.362642,-2.099994,-1.457708,-0.689214,-0.192812,...,0.914716,-1.746692,0.021791,-1.626508,0.589722,-0.222012,0.293204,0.805142,0.764535,-1.319099
and,0.915585,-0.601855,1.379885,0.30769,-0.102921,-0.692943,-1.256499,-0.317495,-0.090944,-0.349855,...,-0.950687,-1.24149,1.299908,0.771946,-0.955785,-1.169201,0.469003,1.084787,-0.859178,1.236295
sky,0.169044,1.471745,0.013567,1.063449,0.357744,-0.128788,-0.132404,-0.883995,-0.24392,-0.272949,...,-0.402419,-0.239137,0.4685,1.097794,0.945174,0.313995,0.457497,0.411889,-0.075854,-0.986178
blue,0.165463,-0.271816,-2.161096,-0.679418,-0.572815,1.365263,-0.07723,-1.54479,0.917948,0.061966,...,-0.81391,-1.615587,0.085289,-1.78051,-1.2239,0.445015,0.080041,-1.952795,-0.270981,0.730214
beautiful,-0.264317,-0.042613,0.752842,-1.20149,-0.908022,0.917983,1.014262,1.188829,0.028253,0.166835,...,0.274835,-0.640499,-1.518954,1.361498,1.370613,-0.984414,2.618299,0.159707,-0.472797,0.703599
quick,-0.546409,-0.488233,-0.856448,-1.27419,0.915061,-0.73846,-0.933244,1.239057,0.147252,0.33356,...,0.279685,-0.507048,0.570349,-0.287298,-1.963065,1.283551,0.488708,1.301658,-0.927166,0.627487
brown,1.420189,0.066746,-0.77758,0.414225,-1.025514,0.067859,1.813741,-0.85746,1.433506,1.313242,...,-0.817376,0.111046,1.186138,-0.941866,-1.030077,-1.703512,1.113617,-0.363929,-0.682402,0.105342
fox,-0.21295,0.734865,-0.325665,-1.008388,0.757536,0.231809,0.46231,1.629333,-2.066199,1.296143,...,-1.285876,0.069139,-0.854483,-0.238,1.25557,1.78858,-0.146465,-0.436783,-0.054201,-0.151487
lazy,1.875924,1.042332,-0.434358,-0.422643,-1.084296,-0.435054,0.322612,-0.822072,0.063755,-0.634266,...,0.388472,-0.534687,-0.499022,-0.331182,0.681713,-0.131035,-1.034492,-0.561525,1.541132,-1.750968


In [None]:
model = CBOW(window_size*2,vocab_size)
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)

checkpoint = torch.load('Cbow_Weights')
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
loss = checkpoint['loss']

model.eval()
# - or -
model.train()

In [24]:
from sklearn.metrics.pairwise import euclidean_distances

weights = weights.view(-1,100)
distance_matrix = euclidean_distances(weights.detach().numpy())
similar_words = {search_term: [id2word[idx] for idx in distance_matrix[word2id[search_term]-1].argsort()[1:4]+1] 
                 for search_term in ['the', 'fox', 'beautiful','brown','lazy']}

similar_words

TypeError: can't convert CUDA tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.