In [1]:
import torch
import numpy as np
from data import get_dataloaders
from torch.utils.data import Dataset, DataLoader, random_split
from transformers import BertTokenizer, BertForSequenceClassification

In [2]:
num_epochs = 2
lr = 1e-4

In [4]:
train_loader, test_loader, num_labels, dataset = get_dataloaders(
    file="data/data_sample.csv",
    tokenizer="bert",
    batch_size=7
)

In [5]:
# Load a pretrained model from a checkpoint
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=num_labels)

# Change activation to sigmoid for multi-label classification
model.classifier.activation = torch.nn.Sigmoid()

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

In [5]:
optimizer = torch.optim.Adam(model.parameters(), lr = lr)
criterion = torch.nn.BCEWithLogitsLoss()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)


# Fine tune the model for the task
for epoch in range(num_epochs):
    print(f'Epoch {epoch}')

    model.train()
    train_loss = 0.0
    for batch in train_loader:
        texts = batch["texts"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["labels"].to(device)

        model.zero_grad()
        outputs = model(texts, attention_mask=attention_mask)
        loss = criterion(outputs.logits, labels)

        loss.backward()
        optimizer.step()

        train_loss += loss.item()

    print(f"Train loss: {train_loss:.4f}")

Epoch 0
Train loss: 2.6049
Epoch 1
Train loss: 2.1096


In [6]:
# Test
model.eval()
with torch.no_grad():
    for batch in test_loader:
        texts = batch["texts"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["labels"].to(device)

        outputs = model(texts, attention_mask=attention_mask)
        
        predictions = (outputs.logits >= 0.5).float()

        accuracy = torch.sum(predictions == labels).float() / predictions.numel()
        print(f"batch accuracy: {accuracy:.4f}")

batch accuracy: 0.8963
