In [None]:
# Core libraries
import numpy as np
import pandas as pd
import os
from tqdm import tqdm  # Progress bars

# BERT Embeddings
from transformers import BertTokenizer, BertModel
import torch

# Visualization (Optional)
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE  # For embedding visualization

# Warnings (Optional)
import warnings
warnings.filterwarnings('ignore')  # Suppress non-critical alerts

In [None]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model_bert = BertModel.from_pretrained('bert-base-uncased')

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

In [None]:
X_train_bert = np.load('/content/X_train_bert.npy')
X_test_bert = np.load('/content/X_test_bert.npy')

In [None]:
from tensorflow.keras.models import load_model

model = load_model('/content/best_model.h5')



In [None]:
model.summary()

In [None]:
def predict_bias(text, model, tokenizer, bert_model):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
    with torch.no_grad():
        outputs = bert_model(**inputs)
    embedding = outputs.last_hidden_state.mean(dim=1).squeeze().numpy().reshape(1, -1)

    prediction = model.predict(embedding)[0][0]
    label = "Biased" if prediction > 0.5 else "Unbiased"
    print(f"Prediction Score: {prediction:.4f} → {label}")
    return label


In [None]:
unbiased_texts = ["""The Ministry of Health announced the launch of a new vaccination campaign aimed at preventing seasonal influenza.
The program will begin next Monday and is expected to cover over 5 million people across the country.
Health officials recommend that elderly individuals and those with underlying conditions get vaccinated as early as possible.
The campaign will be supported by mobile clinics and local health centers."""]

biased_texts = ["""Once again, the government has proven it cares more about big corporations than ordinary citizens.
The recent tax cuts benefit the wealthy elite while leaving the working class to struggle.
Experts warn that this move could widen the already massive inequality gap in the country.
It’s clear that those in power are out of touch with the real needs of the people."""]

mixed_texts = ["""The city council passed a controversial housing policy that sparked debate among local communities.
Supporters argue it will create affordable homes, while critics claim it favors developers.
The policy includes tax incentives, which some say could lead to budget shortfalls in other areas.
Officials insist the plan balances growth with social responsibility."""]

In [None]:
for text in unbiased_texts + biased_texts + mixed_texts:
    print(text)
    predict_bias(text, model, tokenizer, model_bert)

The Ministry of Health announced the launch of a new vaccination campaign aimed at preventing seasonal influenza.
The program will begin next Monday and is expected to cover over 5 million people across the country.
Health officials recommend that elderly individuals and those with underlying conditions get vaccinated as early as possible.
The campaign will be supported by mobile clinics and local health centers.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 215ms/step
Prediction Score: 0.0178 → Unbiased
Once again, the government has proven it cares more about big corporations than ordinary citizens.
The recent tax cuts benefit the wealthy elite while leaving the working class to struggle.
Experts warn that this move could widen the already massive inequality gap in the country.
It’s clear that those in power are out of touch with the real needs of the people.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
Prediction Score: 0.7499 → Biased
The 

In [None]:
df = pd.read_excel('/content/labeled_dataset.xlsx')
df.head(5)

Unnamed: 0.1,Unnamed: 0,sentence,news_link,outlet,topic,type,group_id,num_sent,Label_bias,Label_opinion,article,biased_words4
0,0,YouTube is making clear there will be no “birt...,https://eu.usatoday.com/story/tech/2020/02/03/...,usa-today,elections-2020,center,1,1,Biased,Somewhat factual but also opinionated,YouTube says no ‘deepfakes’ or ‘birther’ video...,"['belated', 'birtherism']"
1,1,The increasingly bitter dispute between Americ...,https://www.nbcnews.com/news/sports/women-s-te...,msnbc,sport,left,1,1,Non-biased,Entirely factual,"FRISCO, Texas — The increasingly bitter disput...",['bitter']
2,2,So while there may be a humanitarian crisis dr...,https://www.alternet.org/2019/01/here-are-5-of...,alternet,immigration,left,1,1,Biased,Expresses writer’s opinion,Speaking to the country for the first time fro...,['crisis']
3,3,A professor who teaches climate change classes...,https://www.breitbart.com/politics/2019/05/09/...,breitbart,environment,right,1,1,Non-biased,No agreement,A professor who teaches climate change classes...,['legitimate']
4,4,"Looking around the United States, there is nev...",https://thefederalist.com/2020/03/11/woman-who...,federalist,abortion,right,1,1,Biased,Somewhat factual but also opinionated,The left has a thing for taking babies hostage...,"['killing', 'never', 'developing', 'humans', '..."


In [None]:
import re
def clean_text(text):
  text = text.lower()
  text = re.sub(r'[^\w\s]', '', text)
  return text

df['sentence'] = df['sentence'].apply(clean_text)

In [None]:
df = df[df['Label_bias'] != 'No agreement']

In [None]:
df['Label_bias'].value_counts()


Unnamed: 0_level_0,count
Label_bias,Unnamed: 1_level_1
Biased,1018
Non-biased,533


In [None]:
from sklearn.model_selection import train_test_split

X = df['sentence']
y = df['Label_bias']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
y_test.value_counts()

Unnamed: 0_level_0,count
Label_bias,Unnamed: 1_level_1
Biased,208
Non-biased,103


In [None]:
from sklearn.metrics import classification_report, roc_auc_score
import numpy as np

# Step 1: Generate predictions for all test sentences
y_test_labels = y_test.tolist()
y_pred_labels = []
y_pred_scores = []

for sentence in X_test:
    inputs = tokenizer(sentence, return_tensors="pt", truncation=True, padding=True, max_length=512)
    with torch.no_grad():
        outputs = model_bert(**inputs)
    embedding = outputs.last_hidden_state.mean(dim=1).squeeze().numpy().reshape(1, -1)

    prediction_score = model.predict(embedding)[0][0]
    predicted_label = "Biased" if prediction_score > 0.5 else "Non-biased"

    y_pred_labels.append(predicted_label)
    y_pred_scores.append(prediction_score)

# Step 2: Print classification report
print("📊 Classification Report:")
print(classification_report(y_test_labels, y_pred_labels, target_names=["Biased", "Non-biased"]))

# Step 3: Compute G-AUC Score
# Convert labels to binary 1 (Biased) and 0 (Non-biased)
y_test_binary = [1 if label == "Biased" else 0 for label in y_test_labels]
y_pred_scores = np.array(y_pred_scores)

auc_score = roc_auc_score(y_test_binary, y_pred_scores)
print(f"G-AUC Score (binary): {auc_score:.4f}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 360ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6