In [1]:
import numpy as np
import torch
from datasets import DatasetDict
from datasets import load_metric
from torch.utils.data import DataLoader
from transformers import AutoModelForSequenceClassification
from transformers import AutoTokenizer


In [2]:
dataset = DatasetDict.load_from_disk('/home/pavel/work/active_learning_project/exploded_dataset')

In [3]:
f1 = load_metric('f1')


def compute_metrics(eval_preds):
    metric = load_metric('f1')
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels, average='weighted')

In [4]:
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

In [5]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

In [6]:
model = AutoModelForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=5)
model.to(device);

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

In [7]:
import torch
from sklearn.metrics import f1_score
from transformers import AdamW
from transformers import get_scheduler

from scripts.utils import DialogPredictionCustomMetric
from scripts.data import WorstDialogSamplerWithRemoval


def _make_batch_data(batch, tokenizer, device,
                     **tokenizer_kwargs):
    batch_dict = {k: v for k, v in batch.items()}
    data = tokenizer(batch_dict['dialog'], **tokenizer_kwargs)
    data['labels'] = batch_dict['act']
    return {k: v.to(device) for k, v in data.items()}, batch['dialog_id']


class Trainer:

    def __init__(self, model,
                 first_epoch_dataloader,
                 train_dataloader,
                 train_sampler: WorstDialogSamplerWithRemoval,
                 eval_dataloader,
                 tokenizer, device,
                 metric,
                 metric_kwargs,
                 **kwargs):
        self.model = model
        self.first_epoch_dataloader = first_epoch_dataloader
        self.train_dataloader = train_dataloader
        self.train_sampler = train_sampler
        self.eval_dataloader = eval_dataloader
        self.tokenizer = tokenizer
        self.device = device
        self.metric = metric
        self.metric_kwargs = metric_kwargs
        self.dp = DialogPredictionCustomMetric(metric,
                                               metric_kwargs=metric_kwargs)
        self.optimizer = AdamW(model.parameters(), lr=5e-5)
        self.init(**kwargs)

    def init(self, **kwargs):
        self.lr_scheduler = get_scheduler(
            "linear",
            optimizer=self.optimizer,
            num_warmup_steps=0,
            num_training_steps=kwargs['num_training_steps']
        )

    def train(self, num_epochs):
        train_history = {'loss': [], 'metrics': []}
        for epoch in range(num_epochs):
            self.at_each_epoch_step(epoch, train_history)
            self.after_epoch_end(epoch, train_history)
            self.evaluate(epoch, train_history)

    def at_each_epoch_step(self, epoch, train_history):
        self.model.train()
        total_loss = 0
        self.train_sampler.eval(False)
        dataloader = self.train_dataloader
        if epoch == 0:
            dataloader = self.first_epoch_dataloader
        for batch in dataloader:
            data, dialog_ids = _make_batch_data(batch, self.tokenizer, self.device,
                                                truncation=True, padding=True,
                                                max_length=512,
                                                return_tensors='pt')
            outputs = self.model(**data)
            loss = outputs.loss
            total_loss += float(loss)
            loss.backward()

            self.optimizer.step()
            self.lr_scheduler.step()
            self.optimizer.zero_grad()
        print(total_loss)
        train_history['loss'].append(total_loss)

    def after_epoch_end(self, epoch_num, train_history):
        self.train_sampler.eval(True)
        self.model.eval()
        for batch in self.train_dataloader:
            data, dialog_ids = _make_batch_data(batch, self.tokenizer, self.device,
                                                truncation=True, padding=True,
                                                max_length=512,
                                                return_tensors='pt')
            outputs = self.model(**data)
            predictions = torch.argmax(outputs.logits, dim=-1)
            for i in range(len(data['labels'])):
                self.dp.add_answer(int(dialog_ids[i]), int(data['labels'][i]), int(predictions[i]))
        self.train_sampler.update_source_after_epoch()

    def evaluate(self, epoch, train_history):
        self.model.eval()
        for batch in self.eval_dataloader:
            data, dialog_ids = _make_batch_data(batch, self.tokenizer, self.device,
                                                truncation=True, padding=True,
                                                max_length=512,
                                                return_tensors='pt')
            outputs = self.model(**data)
            predictions = torch.argmax(outputs.logits, dim=-1)
            self.metric.add_batch(predictions=predictions, references=data['labels'])
        metric_value = self.metric.compute(**self.metric_kwargs)
        print(metric_value)
        train_history['metrics'].append(metric_value)

In [8]:
bottom_percents = 10
batch_size = 32
dp = DialogPredictionCustomMetric(f1_score, metric_kwargs={'average': 'weighted'})
train_worst_sampler = WorstDialogSamplerWithRemoval(dataset['train'], dp, bottom_percents)
train_dataloader = DataLoader(dataset['train'], batch_size=batch_size, sampler=train_worst_sampler)
eval_dataloader = DataLoader(dataset['validation'], batch_size=batch_size)
first_epoch_dataloader = DataLoader(dataset['test'], batch_size=batch_size)

In [9]:
trainer = Trainer(
    model,
    first_epoch_dataloader,
    train_dataloader,
    train_worst_sampler,
    eval_dataloader,
    tokenizer,
    device,
    f1,
    {'average': 'weighted'},
    num_training_steps=500
)

In [10]:
trainer.train(2)

149.63257610052824
[]
{'f1': 0.7210767409604459}
0
[]
{'f1': 0.7210767409604459}


In [1]:
dp=DialogPredictionCustomMetric(f1_score, {'average': 'weighted'})

NameError: name 'DialogPredictionCustomMetric' is not defined