In [1]:
import sys
import torch
import pandas as pd
from torch.utils.data import DataLoader
from torch.optim import Adam
from torch.utils.data.sampler import SubsetRandomSampler

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from transformers import BertTokenizer, BertModel
from transformers import RobertaTokenizer, RobertaModel
from transformers import DataCollatorForLanguageModeling

In [3]:
import sys
sys.path.append("..")

In [4]:
from src.model import RecoBERT
from src.data import CollatorWrapper, RecoDataset, RobertaTokenizerWrapper
from src.train import train

In [5]:
lr = 0.0001
l2_reg = 0.0
beta1 = 0.9
beta2 = 0.999
epochs = 100
batch_size = 32
workers = 8

In [6]:
# SlovakBERT (https://arxiv.org/abs/2109.15254)
tokenizer = RobertaTokenizer.from_pretrained('gerulata/slovakbert')
tokenize_fn = RobertaTokenizerWrapper(tokenizer)
bert = RobertaModel.from_pretrained('gerulata/slovakbert')
collator = DataCollatorForLanguageModeling(tokenizer)
collate_fn = CollatorWrapper(tokenize_fn, collator)

Some weights of the model checkpoint at gerulata/slovakbert were not used when initializing RobertaModel: ['lm_head.decoder.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModel were not initialized from the model checkpoint at gerulata/slovakbert and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it

In [7]:
# BERT (https://arxiv.org/pdf/1810.04805.pdf)
# tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
# bert = BertModel.from_pretrained("bert-base-cased")
# collator = DataCollatorForLanguageModeling(tokenizer)
# collate_fn = CollatorWrapper(tokenizer, collator)

In [8]:
df = pd.read_csv("../data/fashion.csv", index_col=0)
dataset = RecoDataset(df=df, swap_prob=0.5)

In [9]:
idxs = list(range(len(dataset)))
trainval_split = int(len(dataset) * 0.6)
valtest_split = int(len(dataset) * 0.8)
train_idxs, val_idxs = idxs[:trainval_split], idxs[trainval_split:valtest_split]

train_sampler = SubsetRandomSampler(train_idxs)
val_sampler = SubsetRandomSampler(val_idxs)

train_loader = DataLoader(dataset, batch_size=batch_size, sampler=train_sampler, collate_fn=collate_fn, num_workers=workers)
val_loader = DataLoader(dataset, batch_size=batch_size, sampler=val_sampler, collate_fn=collate_fn, num_workers=workers)

In [10]:
model = RecoBERT(bert, tokenizer.vocab_size)

In [11]:
batch = next(iter(train_loader))

In [12]:
device = "cuda:3" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")
model = model.to(device)

Using cuda:3 device


In [13]:
optim = Adam(model.parameters(), lr=lr, weight_decay=l2_reg, betas=(beta1, beta2))

In [14]:
model = train(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,
    optim=optim,
    epochs=epochs,
    device=device,
    checkpoint="./checkpoint",
    early_stop=20
)

12:51:40 - Epoch 000: Train Loss = 3.9701
12:52:06 - Epoch 000: Val Loss = 2.1140
12:55:08 - Epoch 001: Train Loss = 1.6882
12:55:34 - Epoch 001: Val Loss = 1.4403
12:58:33 - Epoch 002: Train Loss = 1.2563
12:58:59 - Epoch 002: Val Loss = 1.1529
13:01:58 - Epoch 003: Train Loss = 1.0352
13:02:25 - Epoch 003: Val Loss = 1.0815
13:05:23 - Epoch 004: Train Loss = 0.8889
13:05:49 - Epoch 004: Val Loss = 0.9260
13:08:48 - Epoch 005: Train Loss = 0.8336
13:09:15 - Epoch 005: Val Loss = 0.9012
13:12:14 - Epoch 006: Train Loss = 0.7417
13:12:41 - Epoch 006: Val Loss = 0.8189
13:15:40 - Epoch 007: Train Loss = 0.6823
13:16:06 - Epoch 007: Val Loss = 0.7423
13:19:05 - Epoch 008: Train Loss = 0.6333
13:19:31 - Epoch 008: Val Loss = 0.7846
13:22:30 - Epoch 009: Train Loss = 0.6197
13:22:57 - Epoch 009: Val Loss = 0.7339
13:25:56 - Epoch 010: Train Loss = 0.5940
13:26:23 - Epoch 010: Val Loss = 0.6797
13:29:22 - Epoch 011: Train Loss = 0.5619
13:29:49 - Epoch 011: Val Loss = 0.6585
13:32:47 - Epoch

In [1]:
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
tst = torch.randn(2, 5)

In [12]:
tst[0:1]

tensor([[1.0974, 0.7633, 0.4672, 1.1625, 0.4084]])

In [13]:
tst[0]

tensor([1.0974, 0.7633, 0.4672, 1.1625, 0.4084])