# Setup

In [1]:
# Imports
from datasets import load_dataset
from transformers import (
    AutoModelForSequenceClassification,
    BertTokenizer,
    DataCollatorWithPadding,
    TrainingArguments,
    Trainer,
)
import evaluate
import numpy as np

In [2]:
import sys; print(sys.executable)

C:\Users\Rasmus\anaconda3\envs\AML4NLP\python.exe


In [3]:
# Load dataset
model_name = "bert-base-cased"
dataset = load_dataset("stanfordnlp/imdb")

train_dataset = dataset["train"].shuffle(seed=42).select(range(100))      
test_dataset = dataset["test"]          

print(train_dataset)
print(test_dataset)

Dataset({
    features: ['text', 'label'],
    num_rows: 100
})
Dataset({
    features: ['text', 'label'],
    num_rows: 25000
})


In [4]:
# Load tokenizer and model
id2label = {0: "NEGATIVE", 1: "POSITIVE"}
label2id = {"NEGATIVE": 0, "POSITIVE": 1}

model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=2,
    id2label=id2label,
    label2id=label2id,
)
tokenizer = BertTokenizer.from_pretrained(model_name)


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [5]:
def preprocess_datasets(examples):
    return tokenizer(
        examples["text"],
        truncation=True,
        max_length=256
    )

In [6]:
# Encode splits and remove column "text"

encoded_train = train_dataset.map(preprocess_datasets, batched=True)
encoded_test = test_dataset.map(preprocess_datasets, batched=True)
encoded_train = encoded_train.remove_columns(["text"])
encoded_test = encoded_test.remove_columns(["text"])

In [7]:
encoded_train = encoded_train.with_format("torch")
encoded_test = encoded_test.with_format("torch")

In [8]:
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

# Metrics
accuracy = evaluate.load("accuracy")
f1 = evaluate.load("f1")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)
    return {
        "accuracy": accuracy.compute(predictions=preds, references=labels)["accuracy"],
        "f1": f1.compute(predictions=preds, references=labels, average="weighted")["f1"],
    }


In [9]:
training_args = TrainingArguments(
    output_dir="./bert_cased_output",
    eval_strategy="no",       # no eval during training
    save_strategy="no",             # don't save checkpoints each epoch
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=16,
    num_train_epochs=2,
    weight_decay=0.01,
    logging_steps=100,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=encoded_train, 
    # no eval_dataset for now
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,  
)

  trainer = Trainer(


In [10]:
trainer.train()



Step,Training Loss


TrainOutput(global_step=26, training_loss=0.6740114505474384, metrics={'train_runtime': 196.4005, 'train_samples_per_second': 1.018, 'train_steps_per_second': 0.132, 'total_flos': 26311105536000.0, 'train_loss': 0.6740114505474384, 'epoch': 2.0})

In [11]:
trainer.save_model("bert_cased")

In [12]:
test_results = trainer.evaluate(encoded_test.shuffle(seed=42).select(range(100)))
print(test_results)

{'eval_loss': 0.665762186050415, 'eval_accuracy': 0.63, 'eval_f1': 0.6228750901040058, 'eval_runtime': 21.5433, 'eval_samples_per_second': 4.642, 'eval_steps_per_second': 0.325, 'epoch': 2.0}
