In [1]:
!pip install transformers[torch] datasets numpy scikit-learn -q

In [3]:
import numpy as np
from datasets import load_dataset
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_recall_fscore_support
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, \
    Trainer

In [4]:
model_name = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

dataset_name = "imdb"

In [5]:
def tokenize_function(examples):
    return tokenizer(examples["text"], padding=True, truncation=True)


train_dataset, eval_dataset = load_dataset("imdb", split=["train", "test"])

train_dataset = train_dataset.shuffle(seed=42).select(
    list(range(1000))
)
eval_dataset = eval_dataset.shuffle(seed=42).select(
    list(range(1000))
)

train_dataset = train_dataset.map(tokenize_function, batched=True)
eval_dataset = eval_dataset.map(tokenize_function, batched=True)

train_dataset = train_dataset.remove_columns(["text"])
eval_dataset = eval_dataset.remove_columns(["text"])

train_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'label'])
eval_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'label'])

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

In [6]:
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2).to('cuda')

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.bias', 'pre_classifier.weight', 'classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [7]:
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)

    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average='binary')
    acc = accuracy_score(labels, predictions)

    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall,
    }

In [8]:
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=64,
    logging_steps=100,
    eval_steps=100
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics
)

In [9]:
training_args.device

device(type='cuda', index=0)

In [10]:
metrics_before_training = trainer.evaluate(eval_dataset=eval_dataset)

In [11]:
print('Metrics before training: ', metrics_before_training)

Metrics before training:  {'eval_loss': 0.6927319765090942, 'eval_accuracy': 0.497, 'eval_f1': 0.4924318869828456, 'eval_precision': 0.4850894632206759, 'eval_recall': 0.5, 'eval_runtime': 16.8159, 'eval_samples_per_second': 59.467, 'eval_steps_per_second': 0.951}


In [12]:
trainer.train()

Step,Training Loss
100,0.4988
200,0.2684
300,0.1285


TrainOutput(global_step=375, training_loss=0.25956697845458987, metrics={'train_runtime': 145.0991, 'train_samples_per_second': 20.676, 'train_steps_per_second': 2.584, 'total_flos': 397402195968000.0, 'train_loss': 0.25956697845458987, 'epoch': 3.0})

In [13]:
metrics = trainer.evaluate(eval_dataset=eval_dataset)
print(metrics)

{'eval_loss': 0.49094879627227783, 'eval_accuracy': 0.879, 'eval_f1': 0.8771573604060914, 'eval_precision': 0.869215291750503, 'eval_recall': 0.8852459016393442, 'eval_runtime': 16.8402, 'eval_samples_per_second': 59.382, 'eval_steps_per_second': 0.95, 'epoch': 3.0}
