<a href="https://colab.research.google.com/github/temahm/AiCon/blob/main/JobDescriptionBiasAssestmentV2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Installs and imports

In [None]:
!pip -q install -U google-genai
import pandas as pd
import re, json
from google.colab import userdata
from google import genai

Bias keyword dictionary

In [None]:
bias_keywords = {}

bias_keywords['Gender Bias'] = [
    'he', 'she', 'gentleman', 'lady', 'his', 'her', 'manpower', 'salesman', 'foreman',
    'waitress', 'actress', 'mankind', 'rockstar', 'ninja', 'dominant'
]

bias_keywords['Age Bias'] = [
    'young', 'energetic', 'recent graduate', 'digital native', 'seasoned', 'mature',
    'senior', 'youthful', 'fresh talent'
]

bias_keywords['Racial/Ethnic Bias'] = [
    'native speaker', 'cultural fit'
]

bias_keywords['Disability/ADA Bias'] = [
    'able-bodied', 'stand', 'walk', 'lift heavy objects', 'physically demanding',
    'independent mobility', 'hearing', 'sight'
]

bias_keywords['Socioeconomic Bias'] = [
    'prestigious university', 'elite background', 'unpaid internship', 'car ownership'
]

bias_keywords['Exclusionary Language'] = [
    'must have', 'only', 'everyone knows', 'we expect', 'exclusive'
]

bias_keywords['Regulatory Risk'] = [
    'under 30', 'healthy', 'married', 'single', 'family status', 'nationality',
    'religion', 'religions', 'criminal record', 'background checks'
]

print("Bias keywords dictionary initialized.")

Paste job description input

In [None]:
print("Paste the job description. Press Enter on an empty line TWICE to finish.\n")

lines = []
empty_count = 0

while True:
    line = input()
    if line.strip() == "":
        empty_count += 1
        if empty_count >= 2:
            break
    else:
        empty_count = 0
        lines.append(line)

job_description_text = "\n".join(lines).strip()

print("\nJob description received.")
print("Characters:", len(job_description_text))
print("Preview:\n", job_description_text[:500], "...\n")

Gemini client setup + model auto-pick

In [None]:
API_KEY = userdata.get("GEMINI_API_KEY")
if not API_KEY:
    raise RuntimeError("Add your Gemini API key in Colab Secrets as GEMINI_API_KEY")

client = genai.Client(api_key=API_KEY)

def pick_generate_content_model(client):
    # Prefer a “flash” model for speed/cost, but accept anything that supports generateContent.
    preferred = [
        "models/gemini-2.5-flash",
        "models/gemini-2.0-flash",
        "models/gemini-1.5-flash",
        "models/gemini-1.5-pro",
    ]

    available = []
    for m in client.models.list():
        name = getattr(m, "name", None)
        methods = getattr(m, "supported_actions", None) or getattr(m, "supported_methods", None)
        methods_str = " ".join(methods) if isinstance(methods, (list, tuple)) else str(methods)
        if name and ("generateContent" in methods_str or "generate_content" in methods_str):
            available.append(name)

    # pick preferred if available
    for p in preferred:
        if p in available:
            return p

    # otherwise return the first available
    return available[0] if available else None

MODEL_NAME = pick_generate_content_model(client)
if not MODEL_NAME:
    raise RuntimeError("No models available for generateContent with this API key.")

print("Using model:", MODEL_NAME)

def llm(prompt: str) -> str:
    resp = client.models.generate_content(
        model=MODEL_NAME,
        contents=prompt
    )
    return (resp.text or "").strip()

Robust keyword matching

In [None]:
def make_keyword_pattern(keyword: str) -> re.Pattern:
    kw = keyword.strip()
    tokens = [re.escape(t) for t in kw.split()]
    if len(tokens) == 1:
        pattern = r'(?<!\w)' + tokens[0] + r'(?!\w)'
    else:
        pattern = r'(?<!\w)' + r'\s+'.join(tokens) + r'(?!\w)'
    return re.compile(pattern, flags=re.IGNORECASE)

def get_context(text: str, start: int, end: int, window: int = 140) -> str:
    cs = max(0, start - window)
    ce = min(len(text), end + window)
    return text[cs:ce].strip()

raw_hits = []
for category, keywords in bias_keywords.items():
    for keyword in keywords:
        rx = make_keyword_pattern(keyword)
        for m in rx.finditer(job_description_text):
            raw_hits.append({
                "source": "keyword_scan",
                "category": category,
                "keyword": keyword,
                "phrase": m.group(0),
                "start": m.start(),
                "end": m.end(),
                "context": get_context(job_description_text, m.start(), m.end()),
            })

print("Raw keyword hits found:", len(raw_hits))
if raw_hits:
    for h in raw_hits[:10]:
        print(f"- [{h['category']}] '{h['phrase']}' (keyword='{h['keyword']}')")

LLM “open discovery” (find issues even without keywords)

In [None]:
discovery_prompt = f"""
You are auditing a job description for potentially biased, exclusionary, or legally risky language.

Task:
1) Identify any phrases that could discourage protected groups or raise compliance concerns (gender, age, race/ethnicity, disability/ADA, family status, nationality, religion, etc.).
2) Also identify overly exclusionary requirements (e.g., "only", "must have X years", "native speaker", etc.) when phrased in a way that could be unfair.
3) Return results as JSON ONLY in the exact format below.

Return JSON format:
{{
  "findings": [
    {{
      "phrase": "...exact phrase from the text...",
      "category": "Gender Bias | Age Bias | Racial/Ethnic Bias | Disability/ADA Bias | Socioeconomic Bias | Exclusionary Language | Regulatory Risk | Other",
      "severity": 1,
      "reason": "...short reason...",
      "suggested_rewrite": "...neutral alternative..."
    }}
  ]
}}

Job description:
\"\"\"{job_description_text}\"\"\"
"""

discovery_text = llm(discovery_prompt)
print(discovery_text[:1200], "...\n")

Parse discovery JSON safely + merge with keyword hits

In [None]:
import json

def extract_json(text: str):
    # Best-effort: find first {...} block
    start = text.find("{")
    end = text.rfind("}")
    if start == -1 or end == -1 or end <= start:
        return None
    try:
        return json.loads(text[start:end+1])
    except Exception:
        return None

discovery = extract_json(discovery_text)
llm_findings = discovery.get("findings", []) if discovery else []

print("LLM discovery findings:", len(llm_findings))

merged = []

# Add keyword hits as "candidates" (will validate with LLM next)
for h in raw_hits:
    merged.append({
        "source": h["source"],
        "category_hint": h["category"],
        "phrase": h["phrase"],
        "context": h["context"],
        "severity": None,
        "reason": None,
        "suggested_rewrite": None,
        "llm_validated": None
    })

# Add discovery findings directly (still validate later for consistency)
for f in llm_findings:
    merged.append({
        "source": "llm_discovery",
        "category_hint": f.get("category", "Other"),
        "phrase": f.get("phrase", "").strip(),
        "context": None,
        "severity": f.get("severity", None),
        "reason": f.get("reason", None),
        "suggested_rewrite": f.get("suggested_rewrite", None),
        "llm_validated": None
    })

# Remove empties and de-duplicate by phrase
cleaned = []
seen = set()
for item in merged:
    phr = (item["phrase"] or "").strip()
    if not phr:
        continue
    key = phr.lower()
    if key in seen:
        continue
    seen.add(key)
    cleaned.append(item)

print("Merged unique candidates:", len(cleaned))

Validate each candidate with the LLM (YES/NO + final fields)

In [None]:
def validate_one(phrase, category_hint, context_hint):
    # If we don't have context, ask model to find it or assess generally
    context_text = context_hint or job_description_text

    prompt = f"""
You are validating whether a flagged phrase is truly problematic in context.

Return JSON ONLY:
{{
  "is_issue": true,
  "category": "Gender Bias | Age Bias | Racial/Ethnic Bias | Disability/ADA Bias | Socioeconomic Bias | Exclusionary Language | Regulatory Risk | Other",
  "severity": 1,
  "reason": "short reason",
  "suggested_rewrite": "neutral rewrite"
}}

Phrase: "{phrase}"
Category hint: "{category_hint}"

Context:
\"\"\"{context_text}\"\"\"
"""
    text = llm(prompt)
    data = extract_json(text)
    return data, text

validated = []
for item in cleaned:
    data, raw = validate_one(item["phrase"], item["category_hint"], item["context"])
    if not data:
        # If parsing fails, mark unvalidated
        item["llm_validated"] = False
        validated.append(item)
        continue

    if data.get("is_issue") is True:
        item["llm_validated"] = True
        item["category"] = data.get("category", item["category_hint"])
        item["severity"] = data.get("severity", item["severity"])
        item["reason"] = data.get("reason", item["reason"])
        item["suggested_rewrite"] = data.get("suggested_rewrite", item["suggested_rewrite"])
        validated.append(item)

print("Validated issues kept:", len(validated))

Bias Risk Score (0–100) + rubric
This score is explainable and stable.
Rubric (simple, transparent):
Each validated issue adds points based on severity:
severity 1 → +4
severity 2 → +8
severity 3 → +12
severity 4 → +16
severity 5 → +20
Category multipliers:
Regulatory Risk, Disability/ADA → ×1.25
Racial/Ethnic Bias, Age Bias, Gender Bias → ×1.15
Exclusionary Language, Socioeconomic Bias → ×1.0
Other → ×0.8
Cap at 100.

In [None]:
def severity_points(sev):
    try:
        sev = int(sev)
    except:
        sev = 1
    return {1:4, 2:8, 3:12, 4:16, 5:20}.get(sev, 4)

category_multiplier = {
    "Regulatory Risk": 1.25,
    "Disability/ADA Bias": 1.25,
    "Racial/Ethnic Bias": 1.15,
    "Age Bias": 1.15,
    "Gender Bias": 1.15,
    "Exclusionary Language": 1.0,
    "Socioeconomic Bias": 1.0,
    "Other": 0.8,
}

score_raw = 0.0
for it in validated:
    cat = it.get("category", it.get("category_hint", "Other"))
    sev = it.get("severity", 1)
    pts = severity_points(sev)
    score_raw += pts * category_multiplier.get(cat, 1.0)

bias_score = min(100, round(score_raw, 1))

def score_band(s):
    if s <= 10: return "Low"
    if s <= 30: return "Moderate"
    if s <= 60: return "High"
    return "Very High"

print("Bias Risk Score (0–100):", bias_score)
print("Risk Level:", score_band(bias_score))
print("Validated issue count:", len(validated))

Final report table + grouped summary

In [None]:
df = pd.DataFrame(validated)

# normalize category field
if "category" not in df.columns:
    df["category"] = df["category_hint"]

# order columns nicely
cols = ["category", "severity", "phrase", "reason", "suggested_rewrite", "source"]
df = df[[c for c in cols if c in df.columns]]

print("\n=== Summary by category ===")
if len(df) == 0:
    print("No validated issues found.")
else:
    display(df.sort_values(["category", "severity"], ascending=[True, False]))

    summary = df.groupby("category").size().sort_values(ascending=False)
    print(summary)

print("\n=== Suggested rewrites (top 15) ===")
if len(df) > 0:
    for i, row in df.head(15).iterrows():
        print(f"- '{row['phrase']}' → {row.get('suggested_rewrite','')}")