In [60]:
torch.LongTensor([1,2,3]).unsqueeze(1)

tensor([[1],
        [2],
        [3]])

In [79]:
import sys
sys.path.append("../src/")
import os
import time
import json
import torch
import random
import datetime
import pickle as pkl
import torch.nn as nn
from types import NoneType
from itertools import cycle
import torch.optim as optim
from utils import get_next_batch
from vocabulary import Vocabulary
from pytorch_lightning import Trainer
from typing import Union, Mapping, Any
from pytorch_lightning import LightningModule
from pytorch_lightning.loggers import CSVLogger
from pytorch_lightning.callbacks import EarlyStopping
from torch.utils.data import Dataset, IterableDataset
from torch.utils.data import DataLoader, RandomSampler
from pytorch_lightning.callbacks import ModelCheckpoint


DEBUG = False
BATCH_SIZE = 256
EPOCHS = 10
# FNAME, _ = os.path.splitext(os.path.basename(__file__))

FNAME = "w06"

def log(msg: str):
    now = datetime.datetime.now()
    dttm = now.strftime(format="%Y-%m-%d %H:%M:%S.%f") 
    print(f"[{dttm}] {msg}")


def get_samples(tokenized_texts, window_size, texts_count):
    for text_num, tokens in enumerate(tokenized_texts):
        if texts_count and text_num >= texts_count:
            break
        for i in range(len(tokens)):
            central_word = torch.LongTensor(
                [vocabulary.get_index(tokens[i])]
            )
            context = torch.LongTensor(
                [
                    vocabulary.get_index(tokens[i + delta])
                    for delta in range(-window_size, window_size + 1)
                    if 0 <= (i + delta) < len(tokens)
                ]
            )
            # print(
            #     f"{tokens[i]=} {vocabulary.get_index(tokens[i])=} "
            #     f"{torch.LongTensor(vocabulary.get_index(tokens[i]))} "
            #     f"{central_word=} {context=}"
            # )
            # print(f"{central_word} {context}")
            if 2*window_size == context.shape[0]:
                yield central_word, context


def get_samples_cycle(tokenized_texts, window_size, texts_count):
    while True:
        for sample in get_samples(tokenized_texts, window_size, texts_count):
            yield sample


class Word2VecDataset(Dataset):
    def __init__(self, tokenized_texts, vocabulary, window_size=2, texts_count=100000):
        self.samples = list(get_samples(tokenized_texts, window_size, texts_count))
    
    def __len__(self):
        return len(self.samples)
    
    def __getitem__(self, index):
        return self.samples[index]


class Word2VecIterableDataset(IterableDataset):
    def __init__(self, tokenized_texts, vocabulary, window_size=2, texts_count=None):
        self.tokenized_texts = tokenized_texts
        self.vocabulary = vocabulary
        self.window_size = window_size
        self.texts_count = texts_count

    def __iter__(self):
        return get_samples_cycle(self.tokenized_texts, self.window_size, self.texts_count)


class SkipGramModel(LightningModule):
    def __init__(self, vocab_size, embedding_dim=128):
        super().__init__()
        self.embeddings = nn.Embedding(vocab_size, embedding_dim)
        self.out_layer = nn.Linear(embedding_dim, vocab_size)
        self.loss = nn.CrossEntropyLoss()
        self.train_outputs = []
        self.val_outputs = []
        self.test_outputs = []
    
    def forward(self, centrals, contexts):
        projections = self.embeddings.forward(contexts).sum(axis=1)
        # print(f"{projections.shape=}")
        logits = self.out_layer.forward(projections)
        # logits = logits.transpose(1, 2)
        # logits = logits.unsqueeze(1)
        # print(f"{logits.shape=} {centrals.shape=}")
        loss = self.loss(logits, centrals.squeeze())
        return loss
    
    def training_step(self, batch, batch_nb):
        result = self(*batch)
        self.log("loss", result)
        return {'loss': result}
    
    def validation_step(self, batch, batch_nb):
        result = self(*batch)
        self.log("val_loss", result)  
        return {'val_loss': result}

    def test_step(self, batch, batch_nb):
        result = self(*batch)
        self.log("test_loss", result)
        return {'test_loss': self(*batch)}

    def on_train_batch_end(
        self,
        outputs: Union[torch.Tensor, Mapping[str, Any], NoneType],
        batch: Any,
        batch_idx: int,
        dataloader_idx: int = 0,
    ) -> None:
        self.train_outputs.append(outputs)
    
    def on_train_epoch_end(self):
        outputs = self.train_outputs
        avg_loss = torch.stack([x['loss'] for x in outputs]).mean()
        tensorboard_logs = {'loss': avg_loss}
        self.log("train_loss_epoch", avg_loss, on_step=False, on_epoch=True)
        return {'train_loss_epoch': avg_loss, 'progress_bar': tensorboard_logs}
    
    def on_validation_batch_end(
        self,
        outputs: Union[torch.Tensor, Mapping[str, Any], NoneType],
        batch: Any,
        batch_idx: int,
        dataloader_idx: int = 0,
    ) -> None:
        self.val_outputs.append(outputs)
    
    def on_validation_epoch_end(self):
        outputs = self.val_outputs
        avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
        tensorboard_logs = {'val_loss': avg_loss}
        self.log("val_loss_epoch", avg_loss, on_step=False, on_epoch=True)
        return {'val_loss_epoch': avg_loss, 'progress_bar': tensorboard_logs}

    def on_test_batch_end(
        self,
        outputs: Union[torch.Tensor, Mapping[str, Any], NoneType],
        batch: Any,
        batch_idx: int,
        dataloader_idx: int = 0,
    ) -> None:
        self.test_outputs.append(outputs)
    
    def on_test_epoch_end(self):
        outputs = self.test_outputs 
        avg_loss = torch.stack([x['test_loss'] for x in outputs]).mean()
        tensorboard_logs = {'test_loss': avg_loss}
        self.log("test_loss_epoch", avg_loss, on_step=False, on_epoch=True)
        return {'test_loss_epoch': avg_loss, 'progress_bar': tensorboard_logs}
    
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-4)
        return [optimizer]



In [5]:
log("BEGIN")
log("loading prepared data...")
with open("../data/prepared.pkl", "rb") as fp:
    prepared = pkl.load(fp)
vocabulary = prepared["vocabulary"]
texts = prepared["texts"]
contexts = prepared["contexts"]
test_texts = prepared["test_texts"]
del prepared
log("data loaded")


[2024-03-31 17:58:04.529524] BEGIN
[2024-03-31 17:58:04.529610] loading prepared data...
[2024-03-31 17:58:23.228757] data loaded


In [55]:
train_data = Word2VecIterableDataset(texts, vocabulary)
train_loader = DataLoader(train_data, batch_size=BATCH_SIZE)
for i, data in enumerate(train_loader):
    print(data[0].shape, data[1].shape)
    if 5<i:
        break

torch.Size([256, 1]) torch.Size([256, 4])
torch.Size([256, 1]) torch.Size([256, 4])
torch.Size([256, 1]) torch.Size([256, 4])
torch.Size([256, 1]) torch.Size([256, 4])
torch.Size([256, 1]) torch.Size([256, 4])
torch.Size([256, 1]) torch.Size([256, 4])
torch.Size([256, 1]) torch.Size([256, 4])


In [16]:
data.shape

torch.Size([579])

In [None]:
random.shuffle(texts)
train_data = Word2VecIterableDataset(texts, vocabulary)
train_loader = DataLoader(train_data, batch_size=BATCH_SIZE)
random.shuffle(test_texts)
val_data = Word2VecIterableDataset(test_texts, vocabulary)
val_loader = DataLoader(val_data, batch_size=BATCH_SIZE)
model = SkipGramModel(vocabulary.size)
early_stop_callback = EarlyStopping(
    monitor="val_loss",
    min_delta=0.0,
    patience=5,
    verbose=True,
    mode="min",
)
ckpt_callback = ModelCheckpoint(
    monitor="val_loss",
    dirpath="ckpt",
    filename=f"{FNAME}-{{epoch}}-{{val_loss:.2f}}",
    save_top_k=3,
    mode="min",
    save_last=True
)
trainer = Trainer(
    max_epochs=EPOCHS,
    callbacks=[early_stop_callback, ckpt_callback],
    limit_train_batches=2 if DEBUG else 40000,
    limit_val_batches=2 if DEBUG else 500,
    val_check_interval=1 if DEBUG else 2000,
    # enable_progress_bar=False,
)
log("training...")
trainer.fit(model, train_loader, val_loader)
log("END")

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name       | Type             | Params
------------------------------------------------
0 | embeddings | Embedding        | 9.1 M 
1 | out_layer  | Linear           | 9.2 M 
2 | loss       | CrossEntropyLoss | 0     
------------------------------------------------
18.3 M    Trainable params
0         Non-trainable params
18.3 M    Total params
73.179    Total estimated model params size (MB)


[2024-03-31 19:55:50.403495] training...
Epoch 0:   5%|██▉                                                        | 2000/40000 [03:09<1:00:02, 10.55it/s, v_num=39]
Validation: |                                                                                       | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                 | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                    | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                            | 1/500 [00:00<00:11, 43.87it/s][A
Validation DataLoader 0:   0%|▏                                                           | 2/500 [00:00<00:16, 31.03it/s][A
Validation DataLoader 0:   1%|▎                                                           | 3/500 [00:00<00:17, 27.79it/s][A
Validation DataLoader 0:   1%|▍                                                 

Metric val_loss improved. New best score: 6.210



Epoch 0:  10%|█████▉                                                     | 4000/40000 [06:45<1:00:46,  9.87it/s, v_num=39][A
Validation: |                                                                                       | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                 | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                    | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                            | 1/500 [00:00<00:11, 43.78it/s][A
Validation DataLoader 0:   0%|▏                                                           | 2/500 [00:00<00:16, 31.08it/s][A
Validation DataLoader 0:   1%|▎                                                           | 3/500 [00:00<00:17, 28.13it/s][A
Validation DataLoader 0:   1%|▍                                                           | 4/500 [00:00<00:18, 26.84

Metric val_loss improved by 1.222 >= min_delta = 0.0. New best score: 4.988



Epoch 0:  15%|█████████▏                                                   | 6000/40000 [10:21<58:39,  9.66it/s, v_num=39][A
Validation: |                                                                                       | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                 | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                    | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                            | 1/500 [00:00<00:11, 43.83it/s][A
Validation DataLoader 0:   0%|▏                                                           | 2/500 [00:00<00:15, 31.52it/s][A
Validation DataLoader 0:   1%|▎                                                           | 3/500 [00:00<00:17, 28.81it/s][A
Validation DataLoader 0:   1%|▍                                                           | 4/500 [00:00<00:18, 27.53

Metric val_loss improved by 0.648 >= min_delta = 0.0. New best score: 4.340



Epoch 0:  20%|████████████▏                                                | 8000/40000 [13:58<55:54,  9.54it/s, v_num=39][A
Validation: |                                                                                       | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                 | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                    | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                            | 1/500 [00:00<00:11, 44.54it/s][A
Validation DataLoader 0:   0%|▏                                                           | 2/500 [00:00<00:15, 31.85it/s][A
Validation DataLoader 0:   1%|▎                                                           | 3/500 [00:00<00:17, 28.86it/s][A
Validation DataLoader 0:   1%|▍                                                           | 4/500 [00:00<00:18, 27.52

Metric val_loss improved by 0.420 >= min_delta = 0.0. New best score: 3.920



Epoch 0:  25%|███████████████                                             | 10000/40000 [17:33<52:40,  9.49it/s, v_num=39][A
Validation: |                                                                                       | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                 | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                    | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                            | 1/500 [00:00<00:11, 43.95it/s][A
Validation DataLoader 0:   0%|▏                                                           | 2/500 [00:00<00:15, 31.26it/s][A
Validation DataLoader 0:   1%|▎                                                           | 3/500 [00:00<00:17, 28.35it/s][A
Validation DataLoader 0:   1%|▍                                                           | 4/500 [00:00<00:18, 27.24

Metric val_loss improved by 0.313 >= min_delta = 0.0. New best score: 3.607



Epoch 0:  30%|██████████████████                                          | 12000/40000 [21:10<49:23,  9.45it/s, v_num=39][A
Validation: |                                                                                       | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                 | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                    | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                            | 1/500 [00:00<00:11, 42.17it/s][A
Validation DataLoader 0:   0%|▏                                                           | 2/500 [00:00<00:16, 30.72it/s][A
Validation DataLoader 0:   1%|▎                                                           | 3/500 [00:00<00:17, 27.95it/s][A
Validation DataLoader 0:   1%|▍                                                           | 4/500 [00:00<00:19, 25.44

Metric val_loss improved by 0.246 >= min_delta = 0.0. New best score: 3.361



Epoch 0:  35%|█████████████████████                                       | 14000/40000 [24:49<46:06,  9.40it/s, v_num=39][A
Validation: |                                                                                       | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                 | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                    | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                            | 1/500 [00:00<00:10, 45.96it/s][A
Validation DataLoader 0:   0%|▏                                                           | 2/500 [00:00<00:15, 32.57it/s][A
Validation DataLoader 0:   1%|▎                                                           | 3/500 [00:00<00:17, 29.00it/s][A
Validation DataLoader 0:   1%|▍                                                           | 4/500 [00:00<00:18, 27.55

Metric val_loss improved by 0.190 >= min_delta = 0.0. New best score: 3.170



Epoch 0:  40%|████████████████████████                                    | 16000/40000 [28:28<42:42,  9.37it/s, v_num=39][A
Validation: |                                                                                       | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                 | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                    | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                            | 1/500 [00:00<00:11, 44.08it/s][A
Validation DataLoader 0:   0%|▏                                                           | 2/500 [00:00<00:15, 31.42it/s][A
Validation DataLoader 0:   1%|▎                                                           | 3/500 [00:00<00:17, 28.51it/s][A
Validation DataLoader 0:   1%|▍                                                           | 4/500 [00:00<00:18, 27.24

Metric val_loss improved by 0.156 >= min_delta = 0.0. New best score: 3.014



Epoch 0:  45%|███████████████████████████                                 | 18000/40000 [32:05<39:13,  9.35it/s, v_num=39][A
Validation: |                                                                                       | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                 | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                    | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                            | 1/500 [00:00<00:11, 42.04it/s][A
Validation DataLoader 0:   0%|▏                                                           | 2/500 [00:00<00:16, 30.71it/s][A
Validation DataLoader 0:   1%|▎                                                           | 3/500 [00:00<00:17, 28.04it/s][A
Validation DataLoader 0:   1%|▍                                                           | 4/500 [00:00<00:18, 26.96

Metric val_loss improved by 0.133 >= min_delta = 0.0. New best score: 2.881



Epoch 0:  50%|██████████████████████████████                              | 20000/40000 [35:43<35:43,  9.33it/s, v_num=39][A
Validation: |                                                                                       | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                 | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                    | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                            | 1/500 [00:00<00:11, 43.07it/s][A
Validation DataLoader 0:   0%|▏                                                           | 2/500 [00:00<00:16, 30.91it/s][A
Validation DataLoader 0:   1%|▎                                                           | 3/500 [00:00<00:17, 28.21it/s][A
Validation DataLoader 0:   1%|▍                                                           | 4/500 [00:00<00:18, 27.04

Metric val_loss improved by 0.110 >= min_delta = 0.0. New best score: 2.771



Epoch 0:  55%|█████████████████████████████████                           | 22000/40000 [39:22<32:12,  9.31it/s, v_num=39][A
Validation: |                                                                                       | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                 | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                    | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                            | 1/500 [00:00<00:11, 44.20it/s][A
Validation DataLoader 0:   0%|▏                                                           | 2/500 [00:00<00:16, 31.05it/s][A
Validation DataLoader 0:   1%|▎                                                           | 3/500 [00:00<00:17, 28.36it/s][A
Validation DataLoader 0:   1%|▍                                                           | 4/500 [00:00<00:18, 27.17

Metric val_loss improved by 0.095 >= min_delta = 0.0. New best score: 2.676



Epoch 0:  60%|████████████████████████████████████                        | 24000/40000 [43:03<28:42,  9.29it/s, v_num=39][A
Validation: |                                                                                       | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                 | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                    | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                            | 1/500 [00:00<00:14, 34.78it/s][A
Validation DataLoader 0:   0%|▏                                                           | 2/500 [00:00<00:19, 26.05it/s][A
Validation DataLoader 0:   1%|▎                                                           | 3/500 [00:00<00:20, 23.82it/s][A
Validation DataLoader 0:   1%|▍                                                           | 4/500 [00:00<00:21, 23.37

Metric val_loss improved by 0.084 >= min_delta = 0.0. New best score: 2.592



Epoch 0:  65%|███████████████████████████████████████                     | 26000/40000 [46:54<25:15,  9.24it/s, v_num=39][A
Validation: |                                                                                       | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                 | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                    | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                            | 1/500 [00:00<00:12, 40.60it/s][A
Validation DataLoader 0:   0%|▏                                                           | 2/500 [00:00<00:16, 29.42it/s][A
Validation DataLoader 0:   1%|▎                                                           | 3/500 [00:00<00:18, 26.68it/s][A
Validation DataLoader 0:   1%|▍                                                           | 4/500 [00:00<00:19, 25.45

Metric val_loss improved by 0.074 >= min_delta = 0.0. New best score: 2.519



Epoch 0:  70%|██████████████████████████████████████████                  | 28000/40000 [50:42<21:44,  9.20it/s, v_num=39][A
Validation: |                                                                                       | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                 | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                    | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                            | 1/500 [00:00<00:11, 42.85it/s][A
Validation DataLoader 0:   0%|▏                                                           | 2/500 [00:00<00:16, 30.60it/s][A
Validation DataLoader 0:   1%|▎                                                           | 3/500 [00:00<00:17, 27.61it/s][A
Validation DataLoader 0:   1%|▍                                                           | 4/500 [00:00<00:18, 26.28

Metric val_loss improved by 0.064 >= min_delta = 0.0. New best score: 2.454



Epoch 0:  75%|█████████████████████████████████████████████               | 30000/40000 [54:29<18:09,  9.18it/s, v_num=39][A
Validation: |                                                                                       | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                 | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                    | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                            | 1/500 [00:00<00:12, 41.47it/s][A
Validation DataLoader 0:   0%|▏                                                           | 2/500 [00:00<00:17, 29.24it/s][A
Validation DataLoader 0:   1%|▎                                                           | 3/500 [00:00<00:18, 26.65it/s][A
Validation DataLoader 0:   1%|▍                                                           | 4/500 [00:00<00:19, 25.79

Metric val_loss improved by 0.056 >= min_delta = 0.0. New best score: 2.398



Epoch 0:  80%|████████████████████████████████████████████████            | 32000/40000 [58:15<14:33,  9.15it/s, v_num=39][A
Validation: |                                                                                       | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                 | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                    | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                            | 1/500 [00:00<00:11, 42.09it/s][A
Validation DataLoader 0:   0%|▏                                                           | 2/500 [00:00<00:16, 30.19it/s][A
Validation DataLoader 0:   1%|▎                                                           | 3/500 [00:00<00:18, 27.49it/s][A
Validation DataLoader 0:   1%|▍                                                           | 4/500 [00:00<00:18, 26.35

Metric val_loss improved by 0.054 >= min_delta = 0.0. New best score: 2.345



Epoch 0:  85%|█████████████████████████████████████████████████▎        | 34000/40000 [1:02:01<10:56,  9.14it/s, v_num=39][A
Validation: |                                                                                       | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                 | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                    | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                            | 1/500 [00:00<00:11, 42.20it/s][A
Validation DataLoader 0:   0%|▏                                                           | 2/500 [00:00<00:16, 30.44it/s][A
Validation DataLoader 0:   1%|▎                                                           | 3/500 [00:00<00:18, 27.52it/s][A
Validation DataLoader 0:   1%|▍                                                           | 4/500 [00:00<00:18, 26.13

Metric val_loss improved by 0.046 >= min_delta = 0.0. New best score: 2.299



Epoch 0:  90%|████████████████████████████████████████████████████▏     | 36000/40000 [1:05:47<07:18,  9.12it/s, v_num=39][A
Validation: |                                                                                       | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                 | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                    | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                            | 1/500 [00:00<00:11, 41.83it/s][A
Validation DataLoader 0:   0%|▏                                                           | 2/500 [00:00<00:16, 30.18it/s][A
Validation DataLoader 0:   1%|▎                                                           | 3/500 [00:00<00:18, 27.49it/s][A
Validation DataLoader 0:   1%|▍                                                           | 4/500 [00:00<00:18, 26.37

Metric val_loss improved by 0.041 >= min_delta = 0.0. New best score: 2.258



Epoch 0:  95%|███████████████████████████████████████████████████████   | 38000/40000 [1:09:33<03:39,  9.10it/s, v_num=39][A
Validation: |                                                                                       | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                 | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                    | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                            | 1/500 [00:00<00:11, 42.29it/s][A
Validation DataLoader 0:   0%|▏                                                           | 2/500 [00:00<00:16, 30.35it/s][A
Validation DataLoader 0:   1%|▎                                                           | 3/500 [00:00<00:18, 27.33it/s][A
Validation DataLoader 0:   1%|▍                                                           | 4/500 [00:00<00:19, 26.08

Metric val_loss improved by 0.040 >= min_delta = 0.0. New best score: 2.218



Epoch 0:  99%|█████████████████████████████████████████████████████████▋| 39747/40000 [1:12:56<00:27,  9.08it/s, v_num=39][A

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



Epoch 1:   5%|██▉                                                        | 2000/40000 [03:21<1:03:50,  9.92it/s, v_num=39]
Validation: |                                                                                       | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                 | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                    | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                            | 1/500 [00:00<00:11, 42.27it/s][A
Validation DataLoader 0:   0%|▏                                                           | 2/500 [00:00<00:16, 30.62it/s][A
Validation DataLoader 0:   1%|▎                                                           | 3/500 [00:00<00:18, 27.38it/s][A
Validation DataLoader 0:   1%|▍                                                           | 4/500 [00:00<00:19, 26.04it/s

Metric val_loss improved by 0.025 >= min_delta = 0.0. New best score: 2.159



Epoch 1:  10%|█████▉                                                     | 4000/40000 [07:07<1:04:11,  9.35it/s, v_num=39][A
Validation: |                                                                                       | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                 | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                    | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                            | 1/500 [00:00<00:12, 40.51it/s][A
Validation DataLoader 0:   0%|▏                                                           | 2/500 [00:00<00:16, 29.65it/s][A
Validation DataLoader 0:   1%|▎                                                           | 3/500 [00:00<00:19, 25.59it/s][A
Validation DataLoader 0:   1%|▍                                                           | 4/500 [00:00<00:19, 24.93

Metric val_loss improved by 0.033 >= min_delta = 0.0. New best score: 2.126



Epoch 1:  15%|████████▊                                                  | 6000/40000 [10:53<1:01:42,  9.18it/s, v_num=39][A
Validation: |                                                                                       | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                 | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                    | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                            | 1/500 [00:00<00:12, 41.46it/s][A
Validation DataLoader 0:   0%|▏                                                           | 2/500 [00:00<00:16, 29.88it/s][A
Validation DataLoader 0:   1%|▎                                                           | 3/500 [00:00<00:18, 27.10it/s][A
Validation DataLoader 0:   1%|▍                                                           | 4/500 [00:00<00:19, 25.92

Metric val_loss improved by 0.030 >= min_delta = 0.0. New best score: 2.096



Epoch 1:  20%|████████████▏                                                | 8000/40000 [14:42<58:51,  9.06it/s, v_num=39][A
Validation: |                                                                                       | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                 | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                    | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                            | 1/500 [00:00<00:11, 42.91it/s][A
Validation DataLoader 0:   0%|▏                                                           | 2/500 [00:00<00:16, 30.42it/s][A
Validation DataLoader 0:   1%|▎                                                           | 3/500 [00:00<00:18, 27.50it/s][A
Validation DataLoader 0:   1%|▍                                                           | 4/500 [00:00<00:18, 26.29

Metric val_loss improved by 0.023 >= min_delta = 0.0. New best score: 2.073



Epoch 1:  25%|███████████████                                             | 10000/40000 [18:32<55:36,  8.99it/s, v_num=39][A
Validation: |                                                                                       | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                 | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                    | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                            | 1/500 [00:00<00:12, 40.09it/s][A
Validation DataLoader 0:   0%|▏                                                           | 2/500 [00:00<00:16, 29.76it/s][A
Validation DataLoader 0:   1%|▎                                                           | 3/500 [00:00<00:18, 26.98it/s][A
Validation DataLoader 0:   1%|▍                                                           | 4/500 [00:00<00:19, 25.47

Metric val_loss improved by 0.025 >= min_delta = 0.0. New best score: 2.048



Epoch 1:  30%|██████████████████                                          | 12000/40000 [22:21<52:10,  8.95it/s, v_num=39][A
Validation: |                                                                                       | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                 | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                    | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                            | 1/500 [00:00<00:11, 43.03it/s][A
Validation DataLoader 0:   0%|▏                                                           | 2/500 [00:00<00:16, 30.60it/s][A
Validation DataLoader 0:   1%|▎                                                           | 3/500 [00:00<00:17, 27.63it/s][A
Validation DataLoader 0:   1%|▍                                                           | 4/500 [00:00<00:18, 26.35

Metric val_loss improved by 0.025 >= min_delta = 0.0. New best score: 2.023



Epoch 1:  35%|█████████████████████                                       | 14000/40000 [26:12<48:41,  8.90it/s, v_num=39][A
Validation: |                                                                                       | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                 | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                    | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                            | 1/500 [00:00<00:11, 42.14it/s][A
Validation DataLoader 0:   0%|▏                                                           | 2/500 [00:00<00:16, 30.19it/s][A
Validation DataLoader 0:   1%|▎                                                           | 3/500 [00:00<00:18, 27.32it/s][A
Validation DataLoader 0:   1%|▍                                                           | 4/500 [00:00<00:19, 26.02

Metric val_loss improved by 0.019 >= min_delta = 0.0. New best score: 2.005



Epoch 1:  40%|████████████████████████                                    | 16000/40000 [30:00<45:00,  8.89it/s, v_num=39][A
Validation: |                                                                                       | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                 | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                    | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                            | 1/500 [00:00<00:12, 41.46it/s][A
Validation DataLoader 0:   0%|▏                                                           | 2/500 [00:00<00:16, 29.98it/s][A
Validation DataLoader 0:   1%|▎                                                           | 3/500 [00:00<00:18, 27.39it/s][A
Validation DataLoader 0:   1%|▍                                                           | 4/500 [00:00<00:19, 26.09

Metric val_loss improved by 0.020 >= min_delta = 0.0. New best score: 1.985



Epoch 1:  45%|███████████████████████████                                 | 18000/40000 [33:48<41:19,  8.87it/s, v_num=39][A
Validation: |                                                                                       | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                 | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                    | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                            | 1/500 [00:00<00:11, 41.94it/s][A
Validation DataLoader 0:   0%|▏                                                           | 2/500 [00:00<00:16, 30.31it/s][A
Validation DataLoader 0:   1%|▎                                                           | 3/500 [00:00<00:18, 27.42it/s][A
Validation DataLoader 0:   1%|▍                                                           | 4/500 [00:00<00:19, 26.09

Metric val_loss improved by 0.021 >= min_delta = 0.0. New best score: 1.964



Epoch 1:  50%|██████████████████████████████                              | 20000/40000 [37:40<37:40,  8.85it/s, v_num=39][A
Validation: |                                                                                       | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                 | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                    | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                            | 1/500 [00:00<00:12, 40.19it/s][A
Validation DataLoader 0:   0%|▏                                                           | 2/500 [00:00<00:17, 28.94it/s][A
Validation DataLoader 0:   1%|▎                                                           | 3/500 [00:00<00:18, 26.29it/s][A
Validation DataLoader 0:   1%|▍                                                           | 4/500 [00:00<00:19, 25.06

Metric val_loss improved by 0.015 >= min_delta = 0.0. New best score: 1.949



Epoch 1:  55%|█████████████████████████████████                           | 22000/40000 [41:27<33:55,  8.84it/s, v_num=39][A
Validation: |                                                                                       | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                 | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                    | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                            | 1/500 [00:00<00:11, 41.94it/s][A
Validation DataLoader 0:   0%|▏                                                           | 2/500 [00:00<00:16, 30.06it/s][A
Validation DataLoader 0:   1%|▎                                                           | 3/500 [00:00<00:18, 27.33it/s][A
Validation DataLoader 0:   1%|▍                                                           | 4/500 [00:00<00:18, 26.13

Metric val_loss improved by 0.016 >= min_delta = 0.0. New best score: 1.933



Epoch 1:  60%|████████████████████████████████████                        | 24000/40000 [45:16<30:11,  8.83it/s, v_num=39][A
Validation: |                                                                                       | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                 | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                    | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                            | 1/500 [00:00<00:11, 42.01it/s][A
Validation DataLoader 0:   0%|▏                                                           | 2/500 [00:00<00:16, 30.72it/s][A
Validation DataLoader 0:   1%|▎                                                           | 3/500 [00:00<00:17, 27.61it/s][A
Validation DataLoader 0:   1%|▍                                                           | 4/500 [00:00<00:18, 26.37

Metric val_loss improved by 0.017 >= min_delta = 0.0. New best score: 1.916



Epoch 1:  65%|███████████████████████████████████████                     | 26000/40000 [49:06<26:26,  8.82it/s, v_num=39][A
Validation: |                                                                                       | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                 | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                    | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                            | 1/500 [00:00<00:11, 42.85it/s][A
Validation DataLoader 0:   0%|▏                                                           | 2/500 [00:00<00:16, 30.70it/s][A
Validation DataLoader 0:   1%|▎                                                           | 3/500 [00:00<00:18, 27.61it/s][A
Validation DataLoader 0:   1%|▍                                                           | 4/500 [00:00<00:18, 26.37

Metric val_loss improved by 0.014 >= min_delta = 0.0. New best score: 1.903



Epoch 1:  70%|██████████████████████████████████████████                  | 28000/40000 [52:54<22:40,  8.82it/s, v_num=39][A
Validation: |                                                                                       | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                 | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                    | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                            | 1/500 [00:00<00:12, 40.88it/s][A
Validation DataLoader 0:   0%|▏                                                           | 2/500 [00:00<00:17, 29.00it/s][A
Validation DataLoader 0:   1%|▎                                                           | 3/500 [00:00<00:18, 26.60it/s][A
Validation DataLoader 0:   1%|▍                                                           | 4/500 [00:00<00:19, 25.40

Metric val_loss improved by 0.013 >= min_delta = 0.0. New best score: 1.889



Epoch 1:  75%|█████████████████████████████████████████████               | 30000/40000 [56:42<18:54,  8.82it/s, v_num=39][A
Validation: |                                                                                       | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                 | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                    | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                            | 1/500 [00:00<00:11, 43.04it/s][A
Validation DataLoader 0:   0%|▏                                                           | 2/500 [00:00<00:16, 30.74it/s][A
Validation DataLoader 0:   1%|▎                                                           | 3/500 [00:00<00:17, 27.70it/s][A
Validation DataLoader 0:   1%|▍                                                           | 4/500 [00:00<00:18, 26.23

Metric val_loss improved by 0.012 >= min_delta = 0.0. New best score: 1.877



Epoch 1:  80%|██████████████████████████████████████████████▍           | 32000/40000 [1:00:29<15:07,  8.82it/s, v_num=39][A
Validation: |                                                                                       | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                 | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                    | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                            | 1/500 [00:00<00:12, 41.23it/s][A
Validation DataLoader 0:   0%|▏                                                           | 2/500 [00:00<00:17, 28.51it/s][A
Validation DataLoader 0:   1%|▎                                                           | 3/500 [00:00<00:18, 26.39it/s][A
Validation DataLoader 0:   1%|▍                                                           | 4/500 [00:00<00:19, 24.85

Metric val_loss improved by 0.013 >= min_delta = 0.0. New best score: 1.864



Epoch 1:  85%|█████████████████████████████████████████████████▎        | 34000/40000 [1:04:19<11:21,  8.81it/s, v_num=39][A
Validation: |                                                                                       | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                 | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                    | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                            | 1/500 [00:00<00:11, 41.76it/s][A
Validation DataLoader 0:   0%|▏                                                           | 2/500 [00:00<00:16, 30.40it/s][A
Validation DataLoader 0:   1%|▎                                                           | 3/500 [00:00<00:17, 27.85it/s][A
Validation DataLoader 0:   1%|▍                                                           | 4/500 [00:00<00:18, 26.45

Metric val_loss improved by 0.010 >= min_delta = 0.0. New best score: 1.853



Epoch 1:  90%|████████████████████████████████████████████████████▏     | 36000/40000 [1:08:04<07:33,  8.81it/s, v_num=39][A
Validation: |                                                                                       | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                 | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                    | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                            | 1/500 [00:00<00:11, 43.54it/s][A
Validation DataLoader 0:   0%|▏                                                           | 2/500 [00:00<00:15, 31.17it/s][A
Validation DataLoader 0:   1%|▎                                                           | 3/500 [00:00<00:17, 28.38it/s][A
Validation DataLoader 0:   1%|▍                                                           | 4/500 [00:00<00:18, 27.24

Metric val_loss improved by 0.010 >= min_delta = 0.0. New best score: 1.843



Epoch 1:  95%|███████████████████████████████████████████████████████   | 38000/40000 [1:11:51<03:46,  8.81it/s, v_num=39][A
Validation: |                                                                                       | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                 | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                    | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                            | 1/500 [00:00<00:11, 43.40it/s][A
Validation DataLoader 0:   0%|▏                                                           | 2/500 [00:00<00:16, 31.08it/s][A
Validation DataLoader 0:   1%|▎                                                           | 3/500 [00:00<00:17, 27.97it/s][A
Validation DataLoader 0:   1%|▍                                                           | 4/500 [00:00<00:18, 26.50

Metric val_loss improved by 0.011 >= min_delta = 0.0. New best score: 1.832



Epoch 1: 100%|██████████████████████████████████████████████████████████| 40000/40000 [1:15:37<00:00,  8.82it/s, v_num=39][A
Validation: |                                                                                       | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                 | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                    | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                            | 1/500 [00:00<00:12, 41.23it/s][A
Validation DataLoader 0:   0%|▏                                                           | 2/500 [00:00<00:16, 30.11it/s][A
Validation DataLoader 0:   1%|▎                                                           | 3/500 [00:00<00:18, 27.30it/s][A
Validation DataLoader 0:   1%|▍                                                           | 4/500 [00:00<00:18, 26.11

Metric val_loss improved by 0.008 >= min_delta = 0.0. New best score: 1.824



Epoch 2:   5%|██▉                                                        | 2000/40000 [03:20<1:03:35,  9.96it/s, v_num=39][A
Validation: |                                                                                       | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                 | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                    | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                            | 1/500 [00:00<00:11, 42.35it/s][A
Validation DataLoader 0:   0%|▏                                                           | 2/500 [00:00<00:16, 30.59it/s][A
Validation DataLoader 0:   1%|▎                                                           | 3/500 [00:00<00:18, 27.52it/s][A
Validation DataLoader 0:   1%|▍                                                           | 4/500 [00:00<00:18, 26.31

Metric val_loss improved by 0.002 >= min_delta = 0.0. New best score: 1.823



Epoch 2:  10%|█████▉                                                     | 4000/40000 [07:04<1:03:43,  9.41it/s, v_num=39][A
Validation: |                                                                                       | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                 | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                    | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                            | 1/500 [00:00<00:11, 42.33it/s][A
Validation DataLoader 0:   0%|▏                                                           | 2/500 [00:00<00:16, 30.08it/s][A
Validation DataLoader 0:   1%|▎                                                           | 3/500 [00:00<00:18, 27.23it/s][A
Validation DataLoader 0:   1%|▍                                                           | 4/500 [00:00<00:19, 26.04

Metric val_loss improved by 0.013 >= min_delta = 0.0. New best score: 1.810



Epoch 2:  15%|████████▊                                                  | 6000/40000 [10:47<1:01:10,  9.26it/s, v_num=39][A
Validation: |                                                                                       | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                 | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                    | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                            | 1/500 [00:00<00:11, 43.95it/s][A
Validation DataLoader 0:   0%|▏                                                           | 2/500 [00:00<00:16, 30.82it/s][A
Validation DataLoader 0:   1%|▎                                                           | 3/500 [00:00<00:17, 28.03it/s][A
Validation DataLoader 0:   1%|▍                                                           | 4/500 [00:00<00:18, 26.74

Metric val_loss improved by 0.011 >= min_delta = 0.0. New best score: 1.799



Epoch 2:  20%|████████████▏                                                | 8000/40000 [14:31<58:04,  9.18it/s, v_num=39][A
Validation: |                                                                                       | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                 | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                    | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                            | 1/500 [00:00<00:11, 41.81it/s][A
Validation DataLoader 0:   0%|▏                                                           | 2/500 [00:00<00:16, 30.43it/s][A
Validation DataLoader 0:   1%|▎                                                           | 3/500 [00:00<00:18, 27.38it/s][A
Validation DataLoader 0:   1%|▍                                                           | 4/500 [00:00<00:19, 26.08

Metric val_loss improved by 0.004 >= min_delta = 0.0. New best score: 1.795



Epoch 2:  25%|███████████████                                             | 10000/40000 [18:16<54:50,  9.12it/s, v_num=39][A
Validation: |                                                                                       | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                 | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                    | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                            | 1/500 [00:00<00:11, 41.60it/s][A
Validation DataLoader 0:   0%|▏                                                           | 2/500 [00:00<00:16, 29.94it/s][A
Validation DataLoader 0:   1%|▎                                                           | 3/500 [00:00<00:18, 27.23it/s][A
Validation DataLoader 0:   1%|▍                                                           | 4/500 [00:00<00:19, 26.10

Metric val_loss improved by 0.009 >= min_delta = 0.0. New best score: 1.786



Epoch 2:  30%|██████████████████                                          | 12000/40000 [22:03<51:28,  9.07it/s, v_num=39][A
Validation: |                                                                                       | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                 | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                    | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                            | 1/500 [00:00<00:11, 41.88it/s][A
Validation DataLoader 0:   0%|▏                                                           | 2/500 [00:00<00:16, 30.52it/s][A
Validation DataLoader 0:   1%|▎                                                           | 3/500 [00:00<00:18, 27.61it/s][A
Validation DataLoader 0:   1%|▍                                                           | 4/500 [00:00<00:18, 26.37

Metric val_loss improved by 0.009 >= min_delta = 0.0. New best score: 1.777



Epoch 2:  35%|█████████████████████                                       | 14000/40000 [25:49<47:57,  9.04it/s, v_num=39][A
Validation: |                                                                                       | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                 | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                    | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                            | 1/500 [00:00<00:11, 42.57it/s][A
Validation DataLoader 0:   0%|▏                                                           | 2/500 [00:00<00:16, 30.38it/s][A
Validation DataLoader 0:   1%|▎                                                           | 3/500 [00:00<00:18, 27.55it/s][A
Validation DataLoader 0:   1%|▍                                                           | 4/500 [00:00<00:18, 26.24

Metric val_loss improved by 0.004 >= min_delta = 0.0. New best score: 1.773



Epoch 2:  40%|████████████████████████                                    | 16000/40000 [29:34<44:22,  9.02it/s, v_num=39][A
Validation: |                                                                                       | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                 | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                    | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                            | 1/500 [00:00<00:11, 43.95it/s][A
Validation DataLoader 0:   0%|▏                                                           | 2/500 [00:00<00:16, 30.98it/s][A
Validation DataLoader 0:   1%|▎                                                           | 3/500 [00:00<00:17, 28.08it/s][A
Validation DataLoader 0:   1%|▍                                                           | 4/500 [00:00<00:18, 26.87

Metric val_loss improved by 0.007 >= min_delta = 0.0. New best score: 1.767



Epoch 2:  45%|███████████████████████████                                 | 18000/40000 [33:18<40:42,  9.01it/s, v_num=39][A
Validation: |                                                                                       | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                 | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                    | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                            | 1/500 [00:00<00:11, 43.09it/s][A
Validation DataLoader 0:   0%|▏                                                           | 2/500 [00:00<00:16, 30.76it/s][A
Validation DataLoader 0:   1%|▎                                                           | 3/500 [00:00<00:17, 28.06it/s][A
Validation DataLoader 0:   1%|▍                                                           | 4/500 [00:00<00:18, 26.79

Metric val_loss improved by 0.008 >= min_delta = 0.0. New best score: 1.758



Epoch 2:  50%|██████████████████████████████                              | 20000/40000 [37:04<37:04,  8.99it/s, v_num=39][A
Validation: |                                                                                       | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                 | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                    | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                            | 1/500 [00:00<00:11, 41.87it/s][A
Validation DataLoader 0:   0%|▏                                                           | 2/500 [00:00<00:16, 30.19it/s][A
Validation DataLoader 0:   1%|▎                                                           | 3/500 [00:00<00:18, 27.42it/s][A
Validation DataLoader 0:   1%|▍                                                           | 4/500 [00:00<00:18, 26.30

Metric val_loss improved by 0.003 >= min_delta = 0.0. New best score: 1.755



Epoch 2:  55%|█████████████████████████████████                           | 22000/40000 [40:50<33:24,  8.98it/s, v_num=39][A
Validation: |                                                                                       | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                 | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                    | 0/500 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                            | 1/500 [00:00<00:11, 42.66it/s][A
Validation DataLoader 0:   0%|▏                                                           | 2/500 [00:00<00:16, 30.66it/s][A
Validation DataLoader 0:   1%|▎                                                           | 3/500 [00:00<00:17, 27.81it/s][A
Validation DataLoader 0:   1%|▍                                                           | 4/500 [00:00<00:18, 26.70

In [25]:
for x in get_samples(texts, window_size=2, texts_count=7):
    break

In [29]:
x[0].shape

torch.Size([21])

In [28]:
x[1]

tensor([  21, 8642,    3])

In [34]:
vocabulary.get_index("для")

21

In [77]:
DEBUG

False

In [78]:
Trainer?

[0;31mInit signature:[0m
[0mTrainer[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0;34m*[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0maccelerator[0m[0;34m:[0m [0mUnion[0m[0;34m[[0m[0mstr[0m[0;34m,[0m [0mpytorch_lightning[0m[0;34m.[0m[0maccelerators[0m[0;34m.[0m[0maccelerator[0m[0;34m.[0m[0mAccelerator[0m[0;34m][0m [0;34m=[0m [0;34m'auto'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mstrategy[0m[0;34m:[0m [0mUnion[0m[0;34m[[0m[0mstr[0m[0;34m,[0m [0mpytorch_lightning[0m[0;34m.[0m[0mstrategies[0m[0;34m.[0m[0mstrategy[0m[0;34m.[0m[0mStrategy[0m[0;34m][0m [0;34m=[0m [0;34m'auto'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mdevices[0m[0;34m:[0m [0mUnion[0m[0;34m[[0m[0mList[0m[0;34m[[0m[0mint[0m[0;34m][0m[0;34m,[0m [0mstr[0m[0;34m,[0m [0mint[0m[0;34m][0m [0;34m=[0m [0;34m'auto'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mnum_nodes[0m[0;34m:[0m [0mint[0m [0;34m=[0m [0;36m1[0m[0;34m,[0m[0;34