In [None]:
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
from sklearn.metrics import accuracy_score

In [None]:
df = pd.read_csv("../datasets/preprocessed/webis_news_bias_20.csv")

tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
model = AutoModelForSequenceClassification.from_pretrained("bucketresearch/politicalBiasBERT")

predictions = []

for text in df["content"]:
    inputs = tokenizer(text, return_tensors="pt")
    try:
        with torch.no_grad():
            outputs = model(**inputs)
        logits = outputs.logits
        predicted_class = torch.argmax(logits, dim=-1).item()
        predictions.append(predicted_class)
    except RuntimeError:
        predictions.append(None)

predictions

In [None]:
valid_indices = [i for i, pred in enumerate(predictions) if pred is not None]

filtered_predictions = [predictions[i] for i in valid_indices]
filtered_true_labels = df["leaning"].iloc[valid_indices].map({"left": 0, "center": 1, "right": 2}).tolist()

accuracy = accuracy_score(filtered_true_labels, filtered_predictions)
accuracy