In [None]:
import pandas as pd
import torch
from sklearn.metrics import classification_report
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from huggingface_hub import login
from tqdm import tqdm
tqdm.pandas()

# Log in to huggingface to use Mistral model
login("") # insert own token

# Bias classes of dataset
bias_classes = [
    "no_bias", "group_1", "group_2"
]

# Load dataset with perturbed policy texts
df = pd.read_csv("FINAL_PERTURBED_DATASET.csv")

In [None]:
# Set up Mistral model and tokenizer
model_name = "mistralai/Mistral-7B-Instruct-v0.3"
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto"
)

pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

In [None]:
# Use Mistral to predict the bias of a policy
def classify_bias(description):
    # Prompt for few-shot prediction with Mistral
    prompt = [
        {"role": "system",
         "content": (f"You are a policy-analysis assistant.\n"
                     f"Given a policy excerpt, output *exactly one* of these three labels—nothing else, all lower-case:\n"
                     f"- group_2:\n"
                     f"- group_1:\n"
                     f"- no_bias\n\n"
                     f"Definitions:\n"
                     f"- group_2: policies related to identity, social, and economic status, including:\n"
                     f"• economic: references income, poverty, homelessness, wealth, financial assistance, costs, payments, fees, funding, housing, area living in, etc.\n"
                     f"• racial_cultural: references race, ethnicity, culture, personal beliefs, etc.\n"
                     f"• age: references children, youth, child welfare policies, adults, elderly, seniors, age-related policies, etc.\n"
                     f"• religion: references religious beliefs, regligious groups, faith-based accommodations, etc.\n"
                     f"• gender: references women, men sex, gender identity, use of only one pronoun for a role, sexual harassment, reproductive rights, pregnancy, etc.\n"
                     f"- group_1: policies related to legal, institutional, or civic systems, including:\n"
                     f"• political: references voting rights, politics, elections, campaigns, government, war, international relations, etc.\n"
                     f"• criminal_justice: refernces crime, criminals, court, law enforcement, policing, prison, etc.\n"
                     f"• citizenship: references immigration, immigration status, deportation, visas, border control, etc.\n"
                     f"• disability: references physical or mental impairments, accommodations for impairments, accessibility, illness, etc.\n"
                     f"• education: curriculum, degrees, teaching credentials, language proficiency, language required to learn, standardized testing, school admission, etc.\n"
                     f"- no_bias: procedural, definitional, administrative, factual, or operational text, like implementation details or definitions.\n\n"
                     f"Examples:\n"
                     f"Example 1\n"
                     f"Policy: The job application process mandates that candidates disclose their age and retirement plans, with younger applicants prioritized for advancement.\n"
                     f"Classification: group_2\n\n"
                     f"Example 2\n"
                     f"Policy: Tax exemptions will be granted to corporations investing over $5 million in designated opportunity zones.\n"
                     f"Classification: group_2\n\n"
                     f"Example 3\n"
                     f"Policy: The bill excludes same‑sex partners from family leave benefits.\n"
                     f"Classification: group_2\n\n"
                     f"Example 4\n"
                     f"Policy: Immigrants must demonstrate fluency in English and pass a civic exam to be eligible for naturalization.\n"
                     f"Classification: group_1\n\n"
                     f"Example 5\n"
                     f"Policy: The policy mandates separate healthcare facilities for individuals with disabilities, citing operational constraints.\n"
                     f"Classification: group_1\n\n"
                     f"Example 6\n"
                     f"Policy: A bill mandates non‑citizens charged with a Class 4 felony shall be designated deportable immediately upon indictment.\n"
                     f"Classification: group_1\n\n"
                     f"Example 7\n"
                     f"Policy: The committee will meet quarterly to review environmental compliance reports.\n"
                     f"Classification: no_bias\n\n"
                     f"Example 8\n"
                     f"Policy: Applicants must submit identity verification and a signed consent form for record access.\n"
                     f"Classification: no_bias\n\n"
                     f"Example 9\n"
                     f"Policy: The funding request was approved following standard procurement guidelines and budgetary review.\n"
                     f"Classification: no_bias\n\n"
                     f"The order of the eaxmples do not matter. They have no significance in how you should classify excerpts.\n\n"
                     f"Instructions:\n"
                     f"Follow the following steps exactly, when picking a label:\n"
                     f"Step 1: Does the excerpt fit under economic, racial_cultural, age, religion, or gender? If yes, pick group_2.\n"
                     f"Step 2: If the answer to Step 1 was \"no\", does the excerpt fit under political, criminal_justice, citizenship, disability, or education? If yes, pick group_1.\n"
                     f"Step 3: If the excerpt does not fit under group_1 or group_2, it is purely a procedure, a definition, or administrative, so pick no_bias.\n"
                     f"- Do not default to any label. All labels are equally possible for an excerpt.\n"
                     f"- Return just the label; no explanations."
                     )},
        {"role": "user",
         "content": (f"Classify the following policy excerpt.\n"
                     f"Return *only* one of: no_bias, group_1, group_2\n\n"
                     f"Excerpt:\n"
                     f"\"\"\"\n"
                     f"{description}\n"
                     f"\"\"\""
                     )}
    ]
    # Get predicted bias classification from model
    result = pipe(prompt, max_new_tokens=10, do_sample=False)
    generated_text = result[0]['generated_text']
    predicted = generated_text[-1]["content"].strip().split("\n")[0]

    for cls in bias_classes:
        if cls.lower() in predicted.lower():
            return cls
    return "Unknown"

In [None]:
# Use Mistral to predict bias of every policy
for i in tqdm(range(len(df))):
    prediction = classify_bias(df.loc[i, "policy_perturbed"])
    df.loc[i, "predicted_bias_perturbed"] = prediction
    df.to_csv("mistral_few_shot_perturbed.csv", index = False)

# Calculate the number of correct bias predictions
df['correct_perturbed'] = df['bias_type_merged'].str.strip().str.lower() == df['predicted_bias_perturbed'].str.strip().str.lower()

df.to_csv("mistral_few_shot_perturbed.csv", index = False)

In [None]:
# Calculate evaluation results for Mistral bias predictions
df = pd.read_csv("mistral_few_shot_perturbed.csv")
accuracy = df['correct_perturbed'].mean()
print(f"Accuracy: {accuracy * 100:.2f}%")

labels = df["bias_type_merged"].tolist()
predictions = df["predicted_bias_perturbed"].tolist()

print(classification_report(labels, predictions))