In [None]:
from google.colab import files
uploaded = files.upload()

In [None]:
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

model_name = "deepseek-ai/deepseek-llm-7b-chat"
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16, # make it float32 if you face errors or ping me
    device_map="auto",
    offload_folder="./offload"
)

In [None]:
from tqdm.notebook import tqdm

pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
df = pd.read_csv("FINAL_DATASET.csv")
print(df.columns.tolist())

**NINE SHOT NORMAL**

In [None]:
#Normal 9 shot
import re
tqdm.pandas()

from transformers import pipeline

pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
def classify_bias(policy):
    prompt = [
          {"role": "system",
         "content": (f"You are a policy-analysis assistant.\n"
                     f"Given a policy excerpt, output *exactly one* of these three labels—nothing else, all lower-case:\n"
                     f"- no_bias\n"
                     f"- group_1 (political, criminal_justice, citizenship, disability, education)\n"
                     f"- group_2 \n\n"
                     f"Definitions:\n"
                     f"- *group_2*: policies related to identity, social, and economic status, including:\n"
                     f"• economic: references income, poverty, homelessness, wealth, financial assistance, costs, payments, fees, funding, housing, area living in, etc.\n"
                     f"• racial_cultural: references race, ethnicity, culture, personal beliefs, etc.\n"
                     f"• age: references children, youth, child welfare policies, adults, elderly, seniors, age-related policies, etc.\n"
                     f"• religion: references religious beliefs, religious groups, faith-based accommodations, etc.\n"
                     f"• gender: references women, men, sex, gender identity, use of only one pronoun for a role, sexual harassment, reproductive rights, pregnancy, etc.\n\n"
                     f"- *group_1*: policies related to legal, institutional, or civic systems, including:\n"
                     f"• political: references voting rights, politics, elections, campaigns, government, war, international relations etc.1\n"
                     f"• criminal_justice: references crime, criminals, court, law enforcement, policing, prison, etc.\n"
                     f"• citizenship: references immigration, immigration status, deportation, visas, border control, etc.\n"
                     f"• disability: references physical or mental impairments, accommodations for impairments, accessibility, illness, etc.\n"
                     f"• education: curriculum, degrees, teaching credentials, language proficiency, language required to learn, standardized testing, school admission, etc.\n"
                     f"- *no_bias*: procedural, definitional, administrative, factual, or operational text, like implementation details or definitions.\n\n" #highlighted no_bias
                     f"Examples:\n\n"
                     f"Example 1:\n"
                     f"Policy: The job application process mandates that candidates disclose their age and retirement plans, with younger applicants prioritized for advancement.\n"
                     f"Classification: group_2\n\n"
                     f"Example 2:\n"
                     f"Policy: Tax exemptions will be granted to corporations investing over $5 million in designated opportunity zones.\n"
                     f"Classification: group_2\n\n"
                     f"Example 3:\n"
                     f"Policy: The bill excludes same-sex partners from family leave benefits.\n"
                     f"Classification: group_2\n\n"
                     f"Example 4:\n"
                     f"Policy: Immigrants must demonstrate fluency in English and pass a civic exam to be eligible for naturalization.\n"
                     f"Classification: group_1\n\n"
                     f"Example 5:\n"
                     f"Policy: The policy mandates separate healthcare facilities for individuals with disabilities, citing operational constraints.\n"
                     f"Classification: group_1\n\n"
                     f"Example 6:\n"
                     f"Policy: A bill mandates non-citizens charged with a Class 4 felony shall be designated deportable immediately upon indictment.\n"
                     f"Classification: group_1\n\n"
                     f"Example 7:\n"
                     f"Policy: The committee will meet quarterly to review environmental compliance reports.\n"
                     f"Classification: no_bias\n\n"
                     f"Example 8:\n"
                     f"Policy: Applicants must submit identity verification and a signed consent form for record access.\n"
                     f"Classification: no_bias\n\n"
                     f"Example 9:\n"
                     f"Policy: The funding request was approved following standard procurement guidelines and budgetary review.\n"
                     f"Classification: no_bias\n"
                     f"Decision Rules:\n"
                     f"- Focus on legal or civic status -> group_1.\n"
                     f"- Focus on economic standing or demographic identity -> group_2.\n"
                     f"- Purely procedural/factual -> no_bias.\n\n"
                     )},
        {"role": "user",
         "content": (f"Classify the following excerpt:\n"
                     f"\"\"\"\n"
                     f"{policy}\n"
                     f"\"\"\""
                     f"When uncertain, pick the best matching category. No explanations.\n"
                     f"Return *only* one of: group_1, group_2, no_bias,\n\n"

                )}
              ]


    result = pipe(prompt, max_new_tokens=10, do_sample=False)
    generated_text = result[0]['generated_text']
    predicted = generated_text[-1]["content"].strip().split("\n")[0]
    return predicted

#################################################################################################
# Define valid classes
valid_labels = ["group_2", "group_1", "no_bias"]


# Apply classifier
df['predicted_bias'] = df['policy'].progress_apply(classify_bias) #POLICY

################################################################################################

from sklearn.metrics import classification_report, accuracy_score

#clean predictions
df['predicted_bias'] = df['predicted_bias'].astype(str).str.strip().str.lower()
df['bias_type_merged'] = df['bias_type_merged'].astype(str).str.strip().str.lower()

# Accuracy
df['correct'] = df['bias_type_merged'] == df['predicted_bias']
accuracy = df['correct'].mean()
print(f"\nAccuracy: {accuracy * 100:.2f}%")

# Precision, Recall, F1
print("\nClassification Report:")
print(classification_report(
    df['bias_type_merged'],
    df['predicted_bias'],
    labels=valid_labels,
    digits=3
))

# download
df.to_csv("Deepseek7B_Nine_Shot_Normal.csv", index=False)

# If in Colab or Jupyter:
from google.colab import files
files.download("Deepseek7B_Nine_Shot_Normal.csv")