In [1]:
%load_ext autoreload
%autoreload 2

import os

while "notebooks" in os.getcwd():
    os.chdir("..")

In [2]:
from datasets import load_dataset
import numpy as np
from torch import argmax
from torch.nn import Softmax

from belt_nlp.bert_with_pooling import BertClassifierWithPooling

  from .autonotebook import tqdm as notebook_tqdm


# Example - Model BERT with pooling

In this notebook we will show how to use basic methods `fit` and `predict` for the BERT model with pooling.

## Load data - author recognition of Guardian articles

In [3]:
dataset = load_dataset("guardian_authorship","cross_topic_1")

In [4]:
dataset

DatasetDict({
    train: Dataset({
        features: ['author', 'topic', 'article'],
        num_rows: 112
    })
    test: Dataset({
        features: ['author', 'topic', 'article'],
        num_rows: 207
    })
    validation: Dataset({
        features: ['author', 'topic', 'article'],
        num_rows: 62
    })
})

## Divide to train and test sets

In [5]:
X_train = dataset["train"]["article"]
y_train = dataset["train"]["author"]
X_test = dataset["test"]["article"]
y_test = dataset["test"]["author"]

In [6]:
num_labels = len(set(y_train))

In [7]:
num_labels

13

## Fit the model

In [8]:
MODEL_PARAMS = {
    "num_labels": num_labels,
    "batch_size": 16,
    "learning_rate": 5e-5,
    "epochs": 3,
    "chunk_size": 510,
    "stride": 510,
    "minimal_chunk_length": 510,
    "maximal_text_length": 510 * 4,
    "pooling_strategy": "mean",
    "device": "cuda",
    "many_gpus": True,
}
model = BertClassifierWithPooling(**MODEL_PARAMS)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [9]:
model.fit(X_train, y_train, epochs=3)  #  Warning about tokeninizing too long text is expected

## Get predictions

In [10]:
logits = model.predict_logits(X_test)
softmax = Softmax(dim=1)

probabilities = softmax(logits)
classes = argmax(logits, dim=1).cpu().numpy()

## Calculate model accuracy on the test data

In [11]:
accurate = sum(classes == np.array(y_test))
accuracy = accurate / len(y_test)

print(f"Test accuracy: {accuracy}")

Test accuracy: 0.3719806763285024
