In [1]:
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
from torch import cuda
from torch.nn import DataParallel
from tqdm import tqdm
import os

# Load the model and tokenizer
model_name = "/home/pgajo/working/pt_models/incel-bert-base-multilingual-cased-1000k_multi_finetuned1_hate_speech_metrics_id_23"
# model_name = "/home/pgajo/working/pt_models/bert-base-multilingual-cased_finetuned1_hate_speech_metrics_id_17"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
df_test = pd.read_csv('/home/pgajo/working/data/datasets/Italian/Il_forum_dei_brutti/IFD-IT-500.csv')
display(df_test.head())
import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from torch.utils.data import DataLoader, Dataset

class TestDataset(Dataset):
    def __init__(self, texts, labels, tokenizer):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]
        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=512,
            return_token_type_ids=False,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt',
        )
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'label': torch.tensor(label, dtype=torch.long)
        }

def evaluate(model, data_loader, device):
    model.eval()
    predictions, true_labels = [], []

    with torch.no_grad():
        for batch in tqdm(data_loader, desc="Evaluating"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['label'].to(device)

            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            _, preds = torch.max(outputs.logits, dim=1)

            predictions.extend(preds.tolist())
            true_labels.extend(labels.tolist())

    accuracy = accuracy_score(true_labels, predictions)
    precision, recall, f1, _ = precision_recall_fscore_support(true_labels, predictions, average='binary')

    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1
    }

device = torch.device("cuda" if cuda.is_available() else "cpu")
model.to(device)
model = DataParallel(model) if cuda.device_count() > 1 else model

texts = df_test['text'].tolist()
labels = df_test['hs'].tolist()
test_data = TestDataset(texts, labels, tokenizer)

test_data_loader = DataLoader(
    test_data,
    batch_size=16,
    num_workers=4,
    pin_memory=True,
    shuffle=False
)

results = evaluate(model, test_data_loader, device)
print("Results: ", results)


2023-04-19 17:09:59.103241: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-04-19 17:09:59.212126: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-04-19 17:09:59.212147: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2023-04-19 17:09:59.734359: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directo

Unnamed: 0.1,row_id,post_thread_num,user,hs,misogynous,racist,multi,text,quote_text,data_type,Unnamed: 0
0,127091,48,StronzOne,1,1,0,1,"Ma certo, sempre mettere gli addominali. Quand...","CITAZIONE (Clochard Schizoide @ 9/11/2021, 11:...",test,
1,16323,34,R o s s o,1,1,1,3,"Compagno le n3gr3 sono oggettivamente brutte, ...","CITAZIONE (SigmaSuke @ 22/12/2021, 18:07) Non ...",test,
2,196842,3,borndied,1,1,0,1,Quoto.. è ridicola Anch’io se fossi un 7 mi an...,"CITAZIONE (bruttomanonpiaccio @ 23/8/2022, 01:...",test,
3,29083,16,Uranio la Merda,1,1,0,1,"in spiaggia 6 anni fa, ero a una festa, con op...",,test,
4,80581,16,Principe Kekistano,1,1,0,1,sarebbero solo da scopare e buttare nel cesso ...,"CITAZIONE (Monkifrog @ 28/7/2021, 11:49) Sono ...",test,


Evaluating: 100%|██████████| 32/32 [00:12<00:00,  2.51it/s]

Results:  {'accuracy': 0.7, 'precision': 0.5880281690140845, 'recall': 0.835, 'f1': 0.6900826446280992}



