In [1]:
from torch import nn
from transformers import AutoTokenizer, AutoModel


class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.tokenizer = AutoTokenizer.from_pretrained("allegro/herbert-base-cased")
        self.herbert = AutoModel.from_pretrained("allegro/herbert-base-cased")

        for param in self.herbert.parameters():
            param.requires_grad = False
        

        self.dropout = nn.Dropout(0.1)
        self.linear = nn.Linear(768, 1)
    
    def forward(self, embeddings):
        output = self.herbert(**embeddings)
        output = output['pooler_output']
        output = self.dropout(output)
        output = self.linear(output)
        return nn.Sigmoid()(output)
    

In [2]:
import torch
from torch.utils.data import Dataset, DataLoader

class PolishHateSpeechDataset(Dataset):
    def __init__(self, texts, labels, tokenizer):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, index):
        text = self.texts.loc[index].values[0]
        label = self.labels.loc[index].values[0]

        embbedings = self.tokenizer(
            text,
            padding='max_length',
            add_special_tokens=True,
            return_tensors="pt"
        )
        
        label = torch.tensor(label, dtype=torch.float)


        return embbedings, label


In [3]:
from utils import load_data
from torch.utils.data import DataLoader
import torch


batch_size = 32
num_epochs = 10
lr = 2e-5


model = Model()
tokenizer = model.tokenizer

compute_loss = torch.nn.BCELoss()
optimizer = torch.optim.Adam(params=model.parameters(), lr=lr)




Some weights of the model checkpoint at allegro/herbert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.sso.sso_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.sso.sso_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [8]:
X_train, X_val, y_train, y_val = load_data()
train_dataset = PolishHateSpeechDataset(X_train, y_train, tokenizer)
train_data_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

val_dataset = PolishHateSpeechDataset(X_val, y_val, tokenizer)
val_data_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

In [9]:
from tqdm import tqdm


def train(model, train_data_loader, val_data_loader, compute_loss, optimizer, num_epochs, device):
    for epoch in range(num_epochs):
        for batch_idx, (data, targets) in tqdm(enumerate(train_data_loader), f"Epoch {epoch+1}/{num_epochs}"):
            data['input_ids'] = data['input_ids'].squeeze(1).to(device)
            data['attention_mask'] = data['attention_mask'].squeeze(1).to(device)
            data['token_type_ids'] = data['token_type_ids'].squeeze(1).to(device)
            # print(data)
            # data = data.to(device)
            targets = targets.to(device)
            model.train()
            optimizer.zero_grad()
            

            
        

            outputs = model(data)

            loss = compute_loss(outputs, targets.view(-1, 1).to(torch.float32))

            loss.backward()
            optimizer.step()

            if batch_idx % 5 == 0:
                print(f"Epoch [{epoch+1}/{num_epochs}], Batch [{batch_idx+1}/{len(train_data_loader)}], Loss: {loss.item()}")

        # val_loss = evaluate(model, val_data_loader)

        # print(f"Epoch [{epoch+1}/{num_epochs}], Validation Loss: {val_loss}")

    return model

In [10]:
model = model.to(device)

In [11]:
train(model, train_data_loader, val_data_loader, compute_loss, optimizer, num_epochs, device)

Epoch 1/10: 2it [00:08,  3.51s/it]

Epoch [1/10], Batch [1/251], Loss: 0.49637818336486816


Epoch 1/10: 6it [00:23,  4.00s/it]

Epoch [1/10], Batch [6/251], Loss: 0.4804752767086029


Epoch 1/10: 11it [00:37,  3.68s/it]

Epoch [1/10], Batch [11/251], Loss: 0.5410795211791992


Epoch 1/10: 16it [00:52,  3.65s/it]

Epoch [1/10], Batch [16/251], Loss: 0.41043955087661743


Epoch 1/10: 21it [01:07,  3.71s/it]

Epoch [1/10], Batch [21/251], Loss: 0.49963027238845825


Epoch 1/10: 26it [01:23,  3.72s/it]

Epoch [1/10], Batch [26/251], Loss: 0.4645008444786072


Epoch 1/10: 32it [01:38,  2.75s/it]

Epoch [1/10], Batch [31/251], Loss: 0.502802848815918


Epoch 1/10: 36it [01:53,  3.85s/it]

Epoch [1/10], Batch [36/251], Loss: 0.36714205145835876


Epoch 1/10: 41it [02:09,  3.76s/it]

Epoch [1/10], Batch [41/251], Loss: 0.5423718690872192


Epoch 1/10: 46it [02:24,  3.75s/it]

Epoch [1/10], Batch [46/251], Loss: 0.3847930431365967


Epoch 1/10: 51it [02:39,  3.74s/it]

Epoch [1/10], Batch [51/251], Loss: 0.42122697830200195


Epoch 1/10: 56it [02:54,  3.74s/it]

Epoch [1/10], Batch [56/251], Loss: 0.44498854875564575


Epoch 1/10: 61it [03:10,  3.73s/it]

Epoch [1/10], Batch [61/251], Loss: 0.5047924518585205


Epoch 1/10: 66it [03:25,  3.75s/it]

Epoch [1/10], Batch [66/251], Loss: 0.46420818567276


Epoch 1/10: 71it [03:40,  3.76s/it]

Epoch [1/10], Batch [71/251], Loss: 0.3854503035545349


Epoch 1/10: 76it [03:56,  3.75s/it]

Epoch [1/10], Batch [76/251], Loss: 0.37230589985847473


Epoch 1/10: 81it [04:11,  3.76s/it]

Epoch [1/10], Batch [81/251], Loss: 0.3311523199081421


Epoch 1/10: 87it [04:26,  2.76s/it]

Epoch [1/10], Batch [86/251], Loss: 0.3911435604095459


Epoch 1/10: 91it [04:42,  3.87s/it]

Epoch [1/10], Batch [91/251], Loss: 0.40435153245925903


Epoch 1/10: 97it [04:57,  2.76s/it]

Epoch [1/10], Batch [96/251], Loss: 0.3183433413505554


Epoch 1/10: 102it [05:13,  2.79s/it]

Epoch [1/10], Batch [101/251], Loss: 0.3583694100379944


Epoch 1/10: 106it [05:28,  3.95s/it]

Epoch [1/10], Batch [106/251], Loss: 0.39591649174690247


Epoch 1/10: 111it [05:44,  3.83s/it]

Epoch [1/10], Batch [111/251], Loss: 0.2985747158527374


Epoch 1/10: 116it [05:59,  3.82s/it]

Epoch [1/10], Batch [116/251], Loss: 0.34218984842300415


Epoch 1/10: 121it [06:15,  3.85s/it]

Epoch [1/10], Batch [121/251], Loss: 0.4141770005226135


Epoch 1/10: 126it [06:31,  3.96s/it]

Epoch [1/10], Batch [126/251], Loss: 0.2898814082145691


Epoch 1/10: 131it [06:46,  3.82s/it]

Epoch [1/10], Batch [131/251], Loss: 0.2857588827610016


Epoch 1/10: 136it [07:02,  3.78s/it]

Epoch [1/10], Batch [136/251], Loss: 0.32786479592323303


Epoch 1/10: 141it [07:17,  3.79s/it]

Epoch [1/10], Batch [141/251], Loss: 0.3517094850540161


Epoch 1/10: 146it [07:33,  3.79s/it]

Epoch [1/10], Batch [146/251], Loss: 0.31735002994537354


Epoch 1/10: 151it [07:48,  3.80s/it]

Epoch [1/10], Batch [151/251], Loss: 0.22065448760986328


Epoch 1/10: 156it [08:04,  3.80s/it]

Epoch [1/10], Batch [156/251], Loss: 0.2131563127040863


Epoch 1/10: 161it [08:19,  3.79s/it]

Epoch [1/10], Batch [161/251], Loss: 0.26806551218032837


Epoch 1/10: 166it [08:37,  4.12s/it]

Epoch [1/10], Batch [166/251], Loss: 0.2570524215698242


Epoch 1/10: 171it [08:52,  3.85s/it]

Epoch [1/10], Batch [171/251], Loss: 0.25342661142349243


Epoch 1/10: 176it [09:08,  3.83s/it]

Epoch [1/10], Batch [176/251], Loss: 0.302185595035553


Epoch 1/10: 181it [09:23,  3.83s/it]

Epoch [1/10], Batch [181/251], Loss: 0.25534626841545105


Epoch 1/10: 186it [09:39,  3.86s/it]

Epoch [1/10], Batch [186/251], Loss: 0.4934033155441284


Epoch 1/10: 192it [09:55,  2.82s/it]

Epoch [1/10], Batch [191/251], Loss: 0.3895069360733032


Epoch 1/10: 196it [10:11,  4.05s/it]

Epoch [1/10], Batch [196/251], Loss: 0.3471619486808777


Epoch 1/10: 201it [10:27,  3.92s/it]

Epoch [1/10], Batch [201/251], Loss: 0.2879659831523895


Epoch 1/10: 206it [10:43,  3.84s/it]

Epoch [1/10], Batch [206/251], Loss: 0.3964143395423889


Epoch 1/10: 211it [10:58,  3.82s/it]

Epoch [1/10], Batch [211/251], Loss: 0.1916532963514328


Epoch 1/10: 216it [11:14,  3.83s/it]

Epoch [1/10], Batch [216/251], Loss: 0.28262948989868164


Epoch 1/10: 221it [11:29,  3.82s/it]

Epoch [1/10], Batch [221/251], Loss: 0.17160829901695251


Epoch 1/10: 223it [11:36,  3.12s/it]


KeyboardInterrupt: 

In [12]:
import torch
from sklearn.metrics import accuracy_score

def evaluate(model, data_loader, compute_loss):
    model.eval()

    total_correct = 0
    total_loss = 0.0
    total_samples = 0

    with torch.no_grad():
        for data, targets in data_loader:
            

            outputs = model(data)
            loss = compute_loss(outputs, targets)

            predicted = torch.round(outputs)
            total_correct += (predicted == targets).sum().item()

            total_loss += loss.item() * data.size(0)
            total_samples += data.size(0)

    average_loss = total_loss / total_samples
    accuracy = total_correct / total_samples

    return average_loss, accuracy


In [13]:
evaluate(model, val_data_loader, compute_loss)

ValueError: too many values to unpack (expected 2)