# First attempt to create a new representation method for a candidates

In [1]:
# %load_ext lab_black

### Imports

In [1]:
import os
import yaml

import torch
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

from model.encoder import CandidateEncoderConfig
from model.decoder import CandidateDecoderConfig
from model.candidate_vae import CandidateVAE
from trainer.trainer import BetaVaeTrainer, TrainerConfig
from config.general_config import GeneralConfig
from dataset.utils import pad_collate
from dataset.dataset import SellersDataset

### Constants

In [2]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

with open("config/config.yaml", "r") as file:
    try:
        config = yaml.safe_load(file)["vae"]
    except yaml.YAMLError as exc:
        print(exc)

general_config = GeneralConfig(**config["general"])
encoder_config = CandidateEncoderConfig(**{**config["encoder"], **config["general"]})

decoder_config = CandidateDecoderConfig(**{**config["decoder"], **config["general"]})

trainer_config = TrainerConfig(**{**config["trainer"], **config["general"]})

log_dir = os.path.join(general_config.checkpoints_dir, "runs")

os.makedirs(log_dir, exist_ok=True)

writer_tensorboard = SummaryWriter(log_dir)

In [3]:
dataset = SellersDataset(
    dataset_path=general_config.datset_path,
    test_index=general_config.dataset,
    embedder_name=general_config.embedder_name,
    raw_data_path=general_config.raw_data_path,
    device=DEVICE,
    bow_remove_stopwords=general_config.bow_remove_stopwords,
    bow_remove_sentiment=general_config.bow_remove_sentiment,
    nn_embedding_size=encoder_config.lstm_hidden_dim,
    trim_tr=general_config.trim_tr,
)
# dataset.prepare_dataset()
dataset.load_dataset()

Loading dataset data/dataset/...
[2022-06-02 07:42:17,769] {dataset.py:251} INFO - Loading dataset data/dataset/...
Loaded dataset data/dataset/!
[2022-06-02 07:42:19,792] {dataset.py:279} INFO - Loaded dataset data/dataset/!




## Prepare data

### Prepare dataloader

In [4]:
dataloader = DataLoader(
    dataset,
    batch_size=general_config.batch_size,
    collate_fn=pad_collate(dataset.vocab.pad_token),
)

# Load models to test them

In [5]:
candidate_vae = CandidateVAE(
    general_config, encoder_config, decoder_config, dataset.vocab, dataset.embedder
).to(DEVICE)

# Check trainer

In [6]:
trainer = BetaVaeTrainer(
    candidate_vae,
    general_config,
    trainer_config,
    dataloader,
    writer_tensorboard,
)

Initializing BetaVaeTrainer...
[2022-06-02 07:42:23,548] {trainer.py:246} INFO - Initializing BetaVaeTrainer...
Done: BetaVaeTrainer initialized!
[2022-06-02 07:42:23,561] {trainer.py:358} INFO - Done: BetaVaeTrainer initialized!


In [9]:
trainer.fit()

Training loop...
[2022-06-01 15:09:13,978] {trainer.py:741} INFO - Training loop...
Epoch 1/6
[2022-06-01 15:09:13,979] {trainer.py:743} INFO - Epoch 1/6


100%|██████████| 1251/1251 [33:49<00:00,  1.62s/it]

Epoch 2/6
[2022-06-01 15:43:03,646] {trainer.py:743} INFO - Epoch 2/6



100%|██████████| 1251/1251 [33:27<00:00,  1.60s/it]

Epoch 3/6
[2022-06-01 16:16:31,045] {trainer.py:743} INFO - Epoch 3/6



100%|██████████| 1251/1251 [34:32<00:00,  1.66s/it]

Epoch 4/6
[2022-06-01 16:51:03,054] {trainer.py:743} INFO - Epoch 4/6



100%|██████████| 1251/1251 [31:40<00:00,  1.52s/it]

Epoch 5/6
[2022-06-01 17:22:43,920] {trainer.py:743} INFO - Epoch 5/6



100%|██████████| 1251/1251 [34:37<00:00,  1.66s/it]

Epoch 6/6
[2022-06-01 17:57:21,341] {trainer.py:743} INFO - Epoch 6/6



100%|██████████| 1251/1251 [29:30<00:00,  1.42s/it]
