In [33]:
import sys
sys.path.append("../src/")
import os
import time
import torch
import numpy as np
import pickle as pkl
import torch.nn as nn
import torch.optim as optim
from vocabulary import Vocabulary


DEBUG = False


class CBOWModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim=32):
        super().__init__()
        
        self.embeddings = (
            nn.Embedding(vocab_size, embedding_dim)
        )
        self.out_layer = nn.Linear(embedding_dim, vocab_size)

    def forward(self, inputs):
        projections = self.embeddings.forward(inputs).sum(axis=1)
        output = self.out_layer.forward(projections)
        return output
      

def get_next_batch(
    contexts, 
    window_size, 
    batch_size, 
    epochs_count,
):
    assert batch_size % (window_size * 2) == 0
    central_words, contexts = zip(*contexts)
    batch_size //= (window_size * 2)
    for epoch in range(epochs_count):
        indices = np.arange(len(contexts))
        np.random.shuffle(indices)
        batch_begin = 0
        while batch_begin < len(contexts):
            batch_indices = indices[batch_begin: batch_begin + batch_size]
            batch_contexts, batch_centrals = [], []
            for data_ind in batch_indices:
                central_word, context = central_words[data_ind], contexts[data_ind]
                # print(f"{context[0]=}")
                batch_contexts.append(context)
                batch_centrals.append(central_word)
            batch_begin += batch_size
            yield (
                torch.LongTensor(batch_contexts), 
                torch.LongTensor(batch_centrals)
            )



In [25]:
with open("../data/prepared.pkl", "rb") as fp:
    prepared = pkl.load(fp)
vocabulary = prepared["vocabulary"]
texts = prepared["texts"]
contexts = prepared["contexts"]
test_texts = prepared["test_texts"]
del prepared

In [35]:
model = CBOWModel(vocabulary.size, 32)
device = torch.device(
    "cuda" if torch.cuda.is_available() else "cpu"
)
model = model.to(device)
loss_every_nsteps = 1000
total_loss = 0
start_time = time.time()
optimizer = optim.Adam(model.parameters(), lr=0.01)
loss_function = nn.CrossEntropyLoss().to(device)
for step, (batch_contexts, batch_centrals) in enumerate(get_next_batch(contexts, window_size=2, batch_size=256, epochs_count=10)):
    logits = model(batch_contexts) # Прямой проход
    loss = loss_function(logits, batch_centrals) # Подсчёт ошибки
    loss.backward() # Подсчёт градиентов dL/dw
    optimizer.step() # Градиентный спуск или его модификации (в данном случае Adam)
    optimizer.zero_grad() # Зануление градиентов, чтобы их спокойно менять на следующей итерации
    total_loss += loss.item()
    if step != 0 and step % loss_every_nsteps == 0:
        print("Step = {}, Avg Loss = {:.4f}, Time = {:.2f}s".format(step, total_loss / loss_every_nsteps, time.time() - start_time))
        total_loss = 0
        start_time = time.time()
    if DEBUG and 2000 < step:
        break

Step = 1000, Avg Loss = 9.1579, Time = 15.05s
Step = 2000, Avg Loss = 8.4321, Time = 8.95s
Step = 3000, Avg Loss = 8.2219, Time = 9.20s
Step = 4000, Avg Loss = 8.1063, Time = 9.31s
Step = 5000, Avg Loss = 7.9843, Time = 9.33s
Step = 6000, Avg Loss = 7.8986, Time = 9.35s
Step = 7000, Avg Loss = 7.8174, Time = 9.42s
Step = 8000, Avg Loss = 7.7393, Time = 9.37s
Step = 9000, Avg Loss = 7.7229, Time = 9.37s
Step = 10000, Avg Loss = 7.6799, Time = 9.43s
Step = 11000, Avg Loss = 7.6142, Time = 9.35s
Step = 12000, Avg Loss = 7.5700, Time = 9.40s
Step = 13000, Avg Loss = 7.5494, Time = 9.43s
Step = 14000, Avg Loss = 7.4922, Time = 9.41s
Step = 15000, Avg Loss = 7.4573, Time = 9.42s
Step = 16000, Avg Loss = 7.4344, Time = 9.48s
Step = 17000, Avg Loss = 7.3823, Time = 9.43s
Step = 18000, Avg Loss = 7.3985, Time = 9.39s
Step = 19000, Avg Loss = 7.3637, Time = 9.41s
Step = 20000, Avg Loss = 7.3068, Time = 9.46s
Step = 21000, Avg Loss = 7.3155, Time = 9.44s
Step = 22000, Avg Loss = 7.2831, Time = 9.

KeyboardInterrupt: 

In [23]:
lit

NameError: name 'lit' is not defined