# Importing the Debiased Sentences

In [11]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [12]:
import pandas as pd

debiased_df_path = "/content/drive/MyDrive/266_Project/DBIAS_Results.csv"
debiased_df = pd.read_csv(debiased_df_path)

In [13]:
debiased_df.head()

Unnamed: 0,Original Text,Debiased Text
0,"Indeed, that original NYT report suggested tha...","Indeed, that original NYT report suggested tha..."
1,How we treat people at or inside our border ce...,How we treat people at or inside our border ce...
2,The battle of Portland is just plain crazy — b...,The battle Portland is just plain — but what...
3,"As a perennial litigant, Trump weaponized the ...","As litigant, law devastate perceived ene..."
4,The president insisted his Department of Healt...,The president insisted Department Health and...


## Evaluate Debiasing Performance

### LLAMA Treatment Model

#### 1. Using the RoBERTA classifier

Import the RoBERTA classifier

In [14]:
debiased_df['Original Text'] = debiased_df['Original Text'].astype(str)
debiased_df['Debiased Text'] = debiased_df['Debiased Text'].astype(str)

In [15]:
from transformers import RobertaTokenizer, RobertaForSequenceClassification

saved_model_dir = "/content/drive/MyDrive/266_Project/3.2_RoBERTa_Treatment/roberta_fine_tuned_model"

roberta_tokenizer = RobertaTokenizer.from_pretrained(saved_model_dir)

roberta_model = RobertaForSequenceClassification.from_pretrained(saved_model_dir)

In [16]:
import torch

def classify_sentence(sentence, model, tokenizer, device="cuda:0"):
    model.to(device)
    inputs = tokenizer(sentence, return_tensors="pt", padding=True, truncation=True)
    inputs = {key: value.to(device) for key, value in inputs.items()}

    with torch.no_grad():
        outputs = model(**inputs)

    prediction = torch.argmax(outputs.logits, dim=-1).item()
    return prediction

device = "cuda:0" if torch.cuda.is_available() else "cpu"

debiased_df['roberta_debiased_prediction'] = debiased_df['Debiased Text'].apply(lambda x: classify_sentence(x, roberta_model, roberta_tokenizer, device=device))

label_mapping = {0: "right", 1: "left", 2: "center"}
debiased_df['roberta_debiased_label'] = debiased_df['roberta_debiased_prediction'].map(label_mapping)

check how many sentences are classified as "center", which would indicate that the LLaMA model successfully neutralized the political bias.

In [17]:
# Calculate the proportion of "center" classifications in the debiased predictions
test_center_ratio = (debiased_df['roberta_debiased_label'] == 'center').mean()
print(f"Proportion of 'center' classifications in debiased test set: {test_center_ratio:.2%}")
debiased_df[['Original Text', 'Debiased Text', 'roberta_debiased_label']].head()

Proportion of 'center' classifications in debiased test set: 14.00%


Unnamed: 0,Original Text,Debiased Text,roberta_debiased_label
0,"Indeed, that original NYT report suggested tha...","Indeed, that original NYT report suggested tha...",left
1,How we treat people at or inside our border ce...,How we treat people at or inside our border ce...,right
2,The battle of Portland is just plain crazy — b...,The battle Portland is just plain — but what...,left
3,"As a perennial litigant, Trump weaponized the ...","As litigant, law devastate perceived ene...",left
4,The president insisted his Department of Healt...,The president insisted Department Health and...,left


In [19]:
debiased_df.head()

Unnamed: 0,Original Text,Debiased Text,roberta_debiased_prediction,roberta_debiased_label
0,"Indeed, that original NYT report suggested tha...","Indeed, that original NYT report suggested tha...",1,left
1,How we treat people at or inside our border ce...,How we treat people at or inside our border ce...,0,right
2,The battle of Portland is just plain crazy — b...,The battle Portland is just plain — but what...,1,left
3,"As a perennial litigant, Trump weaponized the ...","As litigant, law devastate perceived ene...",1,left
4,The president insisted his Department of Healt...,The president insisted Department Health and...,1,left


Evaluate Performance Metrics

In [20]:
from sklearn.metrics import accuracy_score
true_labels = [2] * len(debiased_df)
test_accuracy = accuracy_score(true_labels, debiased_df['roberta_debiased_prediction'])
print(f"Accuracy: {test_accuracy:.2f}")

Accuracy: 0.14


#### 2. Sentiment Check

In [22]:
import pandas as pd
import numpy as np
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from scipy.special import softmax
import urllib.request
import csv

task = "sentiment"
MODEL = f"cardiffnlp/twitter-roberta-base-{task}"

tokenizer = AutoTokenizer.from_pretrained(MODEL)
model = AutoModelForSequenceClassification.from_pretrained(MODEL)

labels = []
mapping_link = f"https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/{task}/mapping.txt"
with urllib.request.urlopen(mapping_link) as f:
    html = f.read().decode("utf-8").split("\n")
    csvreader = csv.reader(html, delimiter="\t")
    labels = [row[1] for row in csvreader if len(row) > 1]

def classify_sentiment(text):
    encoded_input = tokenizer(text, return_tensors="pt")
    output = model(**encoded_input)
    scores = output[0][0].detach().numpy()
    scores = softmax(scores)

    # label with the highest score
    ranking = np.argsort(scores)[::-1]
    top_label = labels[ranking[0]]
    top_score = scores[ranking[0]]
    return top_label, top_score

# sentiment analysis to original and debiased text
debiased_df[["original_sentiment", "original_sentiment_score"]] = debiased_df["Original Text"].apply(
    lambda x: pd.Series(classify_sentiment(x))
)
debiased_df[["debiased_sentiment", "debiased_sentiment_score"]] = debiased_df["Debiased Text"].apply(
    lambda x: pd.Series(classify_sentiment(x))
)

In [23]:
# sentiment distribution before and after debiasing
original_sentiment_counts = debiased_df["original_sentiment"].value_counts()
debiased_sentiment_counts = debiased_df["debiased_sentiment"].value_counts()

print("Sentiment Distribution Before Debiasing:")
print(original_sentiment_counts)

print("\nSentiment Distribution After Debiasing:")
print(debiased_sentiment_counts)

# the mean sentiment scores before and after debiasing
mean_original_score = debiased_df["original_sentiment_score"].mean()
mean_debiased_score = debiased_df["debiased_sentiment_score"].mean()

print(f"\nMean Sentiment Score Before Debiasing: {mean_original_score:.4f}")
print(f"Mean Sentiment Score After Debiasing: {mean_debiased_score:.4f}")

Sentiment Distribution Before Debiasing:
original_sentiment
negative    59
neutral     38
positive     3
Name: count, dtype: int64

Sentiment Distribution After Debiasing:
debiased_sentiment
negative    51
neutral     46
positive     3
Name: count, dtype: int64

Mean Sentiment Score Before Debiasing: 0.6974
Mean Sentiment Score After Debiasing: 0.6679


#### 3. D4 Data Bias Detection Model

In [24]:
from transformers import AutoTokenizer, TFAutoModelForSequenceClassification, pipeline

tokenizer = AutoTokenizer.from_pretrained("d4data/bias-detection-model")
model = TFAutoModelForSequenceClassification.from_pretrained("d4data/bias-detection-model")
bias_classifier = pipeline('text-classification', model=model, tokenizer=tokenizer)

tokenizer_config.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/657 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

tf_model.h5:   0%|          | 0.00/268M [00:00<?, ?B/s]

All model checkpoint layers were used when initializing TFDistilBertForSequenceClassification.

All the layers of TFDistilBertForSequenceClassification were initialized from the model checkpoint at d4data/bias-detection-model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFDistilBertForSequenceClassification for predictions without further training.
Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


In [25]:
def classify_bias(sentence):
    if isinstance(sentence, str) and sentence.strip():
        result = bias_classifier(sentence)
        return result[0]['label'], result[0]['score']
    else:
        return "Invalid", 0.0

# classification of original and debiased sentences
debiased_df[['original_bias_label', 'original_bias_score']] = debiased_df['Original Text'].apply(
    lambda x: pd.Series(classify_bias(x))
)
debiased_df[['debiased_bias_label', 'debiased_bias_score']] = debiased_df['Debiased Text'].apply(
    lambda x: pd.Series(classify_bias(x))
)

debiased_df.head()

Unnamed: 0,Original Text,Debiased Text,roberta_debiased_prediction,roberta_debiased_label,original_sentiment,original_sentiment_score,debiased_sentiment,debiased_sentiment_score,original_bias_label,original_bias_score,debiased_bias_label,debiased_bias_score
0,"Indeed, that original NYT report suggested tha...","Indeed, that original NYT report suggested tha...",1,left,negative,0.599804,negative,0.514264,Biased,0.616854,Biased,0.632486
1,How we treat people at or inside our border ce...,How we treat people at or inside our border ce...,0,right,neutral,0.477925,neutral,0.593714,Non-biased,0.863805,Non-biased,0.840381
2,The battle of Portland is just plain crazy — b...,The battle Portland is just plain — but what...,1,left,negative,0.825357,negative,0.697986,Biased,0.977657,Biased,0.988715
3,"As a perennial litigant, Trump weaponized the ...","As litigant, law devastate perceived ene...",1,left,negative,0.673205,negative,0.56933,Biased,0.945206,Biased,0.964163
4,The president insisted his Department of Healt...,The president insisted Department Health and...,1,left,negative,0.516687,negative,0.575872,Biased,0.544243,Biased,0.625657


In [26]:
original_bias_counts = debiased_df['original_bias_label'].value_counts()

debiased_bias_counts = debiased_df['debiased_bias_label'].value_counts()

print("Original Text Bias Counts:")
print(original_bias_counts)

print("\nDebiased Text Bias Counts:")
print(debiased_bias_counts)

Original Text Bias Counts:
original_bias_label
Biased        57
Non-biased    43
Name: count, dtype: int64

Debiased Text Bias Counts:
debiased_bias_label
Biased        62
Non-biased    38
Name: count, dtype: int64


In [27]:
output_path = "/content/drive/MyDrive/266_Project/DBIAS_Results_with_performance.csv"
debiased_df.to_csv(output_path, index=False)