In [1]:
import os
import pandas as pd
import numpy as np
import torch
from datasets import Dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    Trainer,
    TrainingArguments
)
from sklearn.metrics import accuracy_score, f1_score
from torch.nn.functional import softmax


  from .autonotebook import tqdm as notebook_tqdm
  from scipy.stats import pearsonr, spearmanr


In [None]:
DATA_PATH = "bias_with_discourse_context.csv"
df = pd.read_csv(DATA_PATH)

# Drop missing or empty sentences
df = df.dropna(subset=["text"])
df["label"] = df["has_bias"].astype(int)


In [3]:
df["input_text"] = (
    "Discourse: " + df["discourse_role"].fillna("NONE") +
    " | Prev: " + df["prev_discourse_role"].fillna("NONE") +
    " | Next: " + df["next_discourse_role"].fillna("NONE") +
    " | Article stance: " + df["article_bias"].fillna("UNKNOWN") +
    " | Sentence: " + df["text"].fillna("")
)

print("ðŸ“Š Dataset preview:")
print(df[["article_id", "article_bias", "discourse_role", "label"]].head())


ðŸ“Š Dataset preview:
                             article_id article_bias      discourse_role  \
0  d2e17368-b2da-4ee4-a7d9-8a5cccdebefd       center       Cause_General   
1  d2e17368-b2da-4ee4-a7d9-8a5cccdebefd       center                Main   
2  d2e17368-b2da-4ee4-a7d9-8a5cccdebefd       center  Distant_Evaluation   
3  d2e17368-b2da-4ee4-a7d9-8a5cccdebefd       center              Speech   
4  d2e17368-b2da-4ee4-a7d9-8a5cccdebefd       center  Distant_Evaluation   

   label  
0      1  
1      0  
2      1  
3      0  
4      0  


In [4]:
dataset = Dataset.from_pandas(df[["article_id", "input_text", "label"]])

tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

def tokenize(batch):
    return tokenizer(batch["input_text"], truncation=True, padding="max_length", max_length=128)

dataset = dataset.map(tokenize, batched=True)

# Split 80/10/10
train_testvalid = dataset.train_test_split(test_size=0.2, seed=42)
test_valid = train_testvalid["test"].train_test_split(test_size=0.5, seed=42)

train_ds = train_testvalid["train"]
val_ds = test_valid["train"]
test_ds = test_valid["test"]


Map: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 8846/8846 [00:01<00:00, 7927.00 examples/s] 


In [5]:
model = AutoModelForSequenceClassification.from_pretrained(
    "bert-base-uncased",
    num_labels=2
)


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [6]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)
    return {
        "accuracy": accuracy_score(labels, preds),
        "f1": f1_score(labels, preds)
    }


In [7]:
training_args = TrainingArguments(
    output_dir="./checkpoints/sentence_bias",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    num_train_epochs=3,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    learning_rate=2e-5,
    load_best_model_at_end=True,
    metric_for_best_model="f1",
    report_to="none",
    logging_dir="./logs",
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)


In [8]:
trainer.train()


 14%|â–ˆâ–Ž        | 181/1329 [20:03<1:39:14,  5.19s/it] 

KeyboardInterrupt: 

In [None]:
metrics = trainer.evaluate(test_ds)
print("ðŸ“ˆ Test metrics:", metrics)


In [None]:
model.eval()
bias_probs = []

for row in df.itertuples():
    tokens = tokenizer(
        row.input_text,
        return_tensors="pt",
        truncation=True,
        padding="max_length",
        max_length=128
    )
    with torch.no_grad():
        logits = model(**tokens).logits
        prob = softmax(logits, dim=-1)[0, 1].item()
    bias_probs.append(prob)

df["bias_prob"] = bias_probs


In [None]:
article_scores = df.groupby("article_id")["bias_prob"].mean().reset_index()
article_scores["predicted_article_bias"] = article_scores["bias_prob"].apply(
    lambda x: "biased" if x > 0.55 else "neutral"
)

print("\nðŸ§¾ Article-level results:")
print(article_scores.head())

# Save outputs
df.to_csv("./sentence_bias_predictions.csv", index=False)
article_scores.to_csv("./article_bias_predictions.csv", index=False)

print("\nâœ… Sentence-level and article-level predictions saved.")