In [None]:
!pip install -q transformers accelerate pandas tqdm huggingface_hub

from huggingface_hub import login
login(token="") # insert own token

In [None]:
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from tqdm import tqdm

tqdm.pandas()

model_name = "meta-llama/Meta-Llama-3-8B-Instruct"
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16 if device == "cuda" else torch.float32,
    device_map="auto"
)

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    pad_token_id=tokenizer.eos_token_id
)

In [None]:
from math import ceil
from tqdm import tqdm

bias_classes = ["group_2", "group_1", "no_bias"]

# Shared system prompt
system_prompt = (
    "You are a policy-analysis assistant.\n\n"
    "Your task is to classify each policy excerpt into **exactly one** of the following three categories, based on its main subject and social framing.\n\n"

    "Understanding Bias:\n"
    "- A policy has a bias if it distinguishes, benefits, restricts, or treats people differently based on identity, legal status, or economic position.\n"
    "- This includes both: when a policy provides support, protections, or special resources for a group and when a policy excludes, limits, penalizes, or places extra burdens on a group.\n"
    "- Even if the wording is neutral, if any group is mentioned, implied, or treated differently, the policy has a bias.\n\n"

    "Understanding No Bias:\n"
    "- A policy has no bias when it is entirely procedural, factual, or operational, and does not involve any group-based treatment.\n"
    "- No identity groups, legal status, or economic classifications should be mentioned or implied.\n"
    "- Examples include timelines, implementation steps, staff protocols, or technical logistics.\n\n"

    "Classification Categories:\n"
    "group_2 → Policies involving personal identity or socioeconomic status:\n"
    "   - economic: poverty, income level, wealth, rent support, financial hardship, housing aid.\n"
    "   - racial/cultural: race, ethnicity, cultural identity, minority status, racial disparities.\n"
    "   - age: elderly, children, youth-targeted policies, age-based restrictions or services.\n"
    "   - religion: religious practices, faith-based exceptions, worship protections.\n"
    "   - gender: women's rights, gender identity, gender discrimination, reproductive policy.\n\n"
    "group_1 → Policies focused on legal, civic, or institutional treatment:\n"
    "   - political: elections, campaigning, districting, war related, international realtions, veteran related.\n"
    "   - criminal justice: policing, sentencing, incarceration, parole, law enforcement.\n"
    "   - citizenship: immigration status, voting rights, naturalization, deportation, visa policy.\n"
    "   - disability: medical, accommodations, physical/mental impairments, special education services.\n"
    "   - education: student placement, curriculum, testing access, teaching, education access, language proficiency, school enrollment.\n\n"
    "no_bias → Neutral, operational, or internal content with no group implications:\n"
    "   - Meeting schedules, titles, definitions, procedural instructions, timelines, internal processes, technical steps.\n\n"

    "Classification Rules:\n"
    "- If a group is named, implied, or treated differently, assign a bias label (group_1 or group_2) based on the domain.\n"
     "- A policy has a bias if it distinguishes, benefits, restricts, or treats people differently based on identity, legal status, or economic position.\n"
    "- A policy has no bias when it is entirely procedural, factual, or operational, and does not involve any group-based treatment.\n"
    "- Do not default to any label. Treat all three categories as equally likely.\n"
    "- Pick the most specific match based on content.\n"
    "- Return only one label: `no_bias`, `group_1`, or `group_2` — all lowercase, no punctuation, no explanation."
)


def classify_batch(df, batch_size=32, column="policy"):
    all_preds = []
    num_batches = ceil(len(df) / batch_size)

    for i in tqdm(range(num_batches), desc="Classifying in batches"):
        batch_df = df.iloc[i * batch_size : (i + 1) * batch_size]
        prompts = []

        for description in batch_df[column]:
            user_prompt = (
                f"Classify the following policy excerpt.\n"
                f"Return only one of: no_bias, group_1, or group_2\n\n"
                f"Excerpt:\n\"\"\"\n{description}\n\"\"\"\n\n"
                f"Your answer:"
            )

            full_prompt = tokenizer.apply_chat_template([
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt}
            ], tokenize=False, add_generation_prompt=True)

            prompts.append(full_prompt)

        # Run batched generation
        outputs = pipe(prompts, max_new_tokens=20, do_sample=False)

        for output, prompt in zip(outputs, prompts):
            generated = output[0]['generated_text']
            prediction = generated[len(prompt):].strip().split("\n")[0].lower()

            normalized_pred = (
                prediction.replace("_", "")
                          .replace("-", "")
                          .replace(".", "")
                          .replace("(", "")
                          .replace(")", "")
                          .strip()
            )

            label = "unknown"
            for cls in bias_classes:
                if normalized_pred == cls.replace("_", ""):
                    label = cls
                    break

            all_preds.append(label)

    return all_preds

In [None]:
import pandas as pd
from math import ceil
from tqdm import tqdm
import os
from google.colab import drive

# Mount Drive
drive.mount('/content/drive')

# Define output directory (overwrite okay)
output_dir = "/content/drive/MyDrive/policy_bias_checkpoints"
os.makedirs(output_dir, exist_ok=True)

# Load full dataset
df = pd.read_csv("FINAL_DATASET.csv")     

# Create an empty column to hold your running predictions
df['predicted_bias'] = None

batch_size  = 32                         
num_batches = ceil(len(df) / batch_size)

for batch_i in tqdm(range(num_batches), desc="Classifying and checkpointing"):
    start = batch_i * batch_size
    end   = min(start + batch_size, len(df))

    # slice out just this batch
    batch_idx = df.index[start:end]
    batch_df  = df.loc[batch_idx]

    # run your classifier on just this slice
    preds = classify_batch(batch_df, batch_size=batch_size)

    # write those back into your master DataFrame
    df.loc[batch_idx, 'predicted_bias'] = preds

    # now save the entire “so far” (or the full df) to a checkpoint
    ckpt_name = f"checkpoint_up_to_{end}.csv"
    df.to_csv(os.path.join(output_dir, ckpt_name), index=False)
    print(f"  → saved full table with predictions through row {end} to {ckpt_name}")



In [None]:
from sklearn.metrics import classification_report, accuracy_score

# Define valid classes
valid_labels = ["group_2", "group_1", "no_bias"]

# Clean up casing and spacing
df['bias_type_merged'] = df['bias_type_merged'].astype(str).str.strip().str.lower()
df['predicted_bias'] = df['predicted_bias'].astype(str).str.strip().str.lower()

# Filter to rows with valid labels only
df_clean = df[
    df['predicted_bias'].isin(valid_labels) &
    df['bias_type_merged'].isin(valid_labels)
].copy()

# Accuracy
df_clean['correct'] = df_clean['bias_type_merged'] == df_clean['predicted_bias']
accuracy = df_clean['correct'].mean()
print(f"\nAccuracy: {accuracy * 100:.2f}%")

# Precision, Recall, F1
print("\nClassification Report:")
print(classification_report(
    df_clean['bias_type_merged'],
    df_clean['predicted_bias'],
    labels=valid_labels,
    digits=3
))



In [None]:
print(df["predicted_bias"].unique())
print(df["predicted_bias"].value_counts())