In [1]:
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification, Trainer, TrainingArguments
import torch

import random
import numpy as np
from sklearn.metrics import classification_report



In [2]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
api_token = user_secrets.get_secret("HuggingFace_write")
MODEL_PATH = "ssary/XLM-RoBERTa-German-sentiment"

## Data

In [3]:
class MyDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

In [4]:
def test_classification_report(model, test_text, test_labels):
    model.eval()
    predictions = []
    real_labels = []
    test_encodings = tokenizer(test_text, max_length=128, truncation=True, padding=True)
    test_dataset = MyDataset(test_encodings, test_labels)
    data_loader = torch.utils.data.DataLoader(test_dataset, batch_size=16, shuffle=False)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    
    with torch.no_grad():
        for batch in data_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(input_ids, attention_mask=attention_mask)
            logits = outputs.logits

            preds = torch.argmax(logits, dim=1)
            predictions.extend(preds.cpu().numpy())
            real_labels.extend(labels.cpu().numpy())
    report = classification_report(real_labels, predictions, target_names=['Negative', 'Neutral', 'Positive'])
    print(report)

Load the test set

In [5]:
test_text_germeval = open("/kaggle/input/fine-tuning-sentiment-analysis-data/test_text.txt", encoding='latin-1').read().rstrip('\n').split('\n')
test_text_sb10k = open("/kaggle/input/sb-10k-german-dataset/test_text.txt", encoding='latin-1').read().rstrip('\n').split('\n')

In [6]:
with open("/kaggle/input/fine-tuning-sentiment-analysis-data/test_labels.txt", encoding='latin-1') as file:
    test_labels_germeval = [int(line.strip()) for line in file]
with open("/kaggle/input/sb-10k-german-dataset/test_labels.txt", encoding='latin-1') as file:
    test_labels_sb10k = [int(line.strip()) for line in file]

In [7]:
model = AutoModelForSequenceClassification.from_pretrained(MODEL_PATH, use_auth_token=api_token)
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, use_auth_token=api_token, use_fast=True)



config.json:   0%|          | 0.00/992 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.11G [00:00<?, ?B/s]



sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

Test 200k model on the GermEval dataset 

In [8]:
test_classification_report(model, test_text_germeval, test_labels_germeval)

              precision    recall  f1-score   support

    Negative       0.76      0.90      0.82    238676
     Neutral       0.56      0.99      0.72     41398
    Positive       0.97      0.85      0.90    579501

    accuracy                           0.87    859575
   macro avg       0.77      0.91      0.82    859575
weighted avg       0.89      0.87      0.87    859575



Test 200K model on the SB10K dataset 

In [9]:
test_classification_report(model, test_text_sb10k, test_labels_sb10k)

              precision    recall  f1-score   support

    Negative       0.60      0.74      0.66       290
     Neutral       0.54      0.74      0.62       290
    Positive       0.82      0.32      0.46       290

    accuracy                           0.60       870
   macro avg       0.65      0.60      0.58       870
weighted avg       0.65      0.60      0.58       870

