In [4]:
import torch
import pandas as pd
import numpy as np
from tqdm import tqdm
from pytorch_pretrained_bert import BertModel, BertTokenizer

In [5]:
inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
outputs = model(**inputs)

last_hidden_states = outputs.last_hidden_state

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [6]:
from transformers import RobertaTokenizer, RobertaModel, RobertaConfig


def get_tokenizer():
    tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
    return tokenizer

def get_pretrained_model():
    model = RobertaModel.from_pretrained("roberta-base")
    return model


In [None]:
import os
import torch
from dataset import get_dataloaders


if torch.cuda.is_available():
    device = torch.device(f"cuda:{torch.cuda.current_device()}")
else:
    device = torch.device("cpu")

model = get_pretrained_model()
model.to(device)

if config["training_parameters"]["resume"]:
    checkpoint_last = torch.load(
        os.path.join(
            config["paths"]["checkpoints"],
            config["training_parameters"]["checkpoints_name_last"],
        )
    )
    checkpoint_best = torch.load(
        os.path.join(
            config["paths"]["checkpoints"],
            config["training_parameters"]["checkpoints_name_best"],
        )
    )
    model = checkpoint_last["model"]
    optimizer = checkpoint_last["optimizer"]
    epoch_start = checkpoint_last["epoch"]
    val_loss_best = checkpoint_best["val_loss"]

else:
    optimizer = torch.optim.SGD(
        model.parameters(),
        lr=config["training_parameters"]["learning_rate"],
    )
    epoch_start = 0
    val_loss_best = None

train_loader, valid_loader, test_loader = get_dataloaders(config)

criterion = config["training_parameters"]["loss"]
criterion = criterion.to(device)

for epoch in range(epoch_start, config["training_parameters"]["nb_epochs"]):
    train_loss = train_one_epoch(
        model, train_loader, optimizer, criterion
    )
    val_loss = evaluate(model, valid_loader, criterion)
    print(
        f"epoch: {epoch} training loss: {train_loss:.3f} validation loss: {val_loss:.3f}"
    )
    # Save model
    checkpoint = {
        "epoch": epoch,
        "model": model,
        "optimizer": optimizer,
        "val_loss": val_loss,
    }
    torch.save(
        checkpoint,
        os.path.join(
            config["paths"]["checkpoints"],
            config["training_parameters"]["checkpoints_name_last"],
        ),
    )
    if val_loss_best is None or val_loss < val_loss_best:
        val_loss_best = val_loss
        torch.save(
            checkpoint,
            os.path.join(
                config["paths"]["checkpoints"],
                config["training_parameters"]["checkpoints_name_last"],
            ),
        )


def train_one_epoch(model, train_loader, optimizer, loss_fn):
    loss_it = list()
    model.train()  # switch to train mode

    for batch_idx, (input, target) in enumerate(train_loader):
        # take a batch
        output = model(**input)

        # loss = loss_fn(output_student, output_teacher, target)
        loss_it.append(loss.item())
        # backward pass
        optimizer.zero_grad()
        loss.backward()
        # update weights
        optimizer.step()

    return sum(loss_it) / len(loss_it)


def evaluate(model, loader, loss_fn):
    loss_it = list()
    model.eval()  # switch to train mode

    for batch_idx, (input, target) in enumerate(loader):
        # forward pass
        with torch.no_grad():
            output = model(**input)
            # loss = loss_fn(output_student, output_teacher, target)
        loss_it.append(loss.item())

    return sum(loss_it) / len(loss_it)
