In [1]:
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# COMPAS Fairness Audit using AI Fairness 360\n",
    "## Analyzing Racial Bias in Recidivism Risk Scores"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Import necessary libraries\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "from aif360.datasets import BinaryLabelDataset\n",
    "from aif360.metrics import BinaryLabelDatasetMetric, ClassificationMetric\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.ensemble import RandomForestClassifier\n",
    "import warnings\n",
    "warnings.filterwarnings('ignore')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 1: Load and Preprocess the COMPAS Dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load the dataset\n",
    "df = pd.read_csv('compas-scores.csv')\n",
    "\n",
    "# Select relevant columns\n",
    "df = df[['race', 'two_year_recid', 'score_text']]\n",
    "df = df.dropna()\n",
    "\n",
    "# Convert score_text to binary: Low = 0 (no recidivism), Medium/High = 1 (recidivism)\n",
    "df['score'] = df['score_text'].map({'Low': 0, 'Medium': 1, 'High': 1})"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 2: Convert to AIF360 Dataset Format\n",
    "We'll define `race` as the protected attribute and `score` as the outcome."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Convert to AIF360 BinaryLabelDataset\n",
    "dataset = BinaryLabelDataset(\n",
    "    df=df,\n",
    "    label_names=['score'],\n",
    "    protected_attribute_names=['race'],\n",
    "    favorable_label=0,\n",
    "    unfavorable_label=1\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 3: Train-Test Split and Model Training"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Split dataset\n",
    "train, test = dataset.split([0.7], shuffle=True)\n",
    "\n",
    "# Train a simple model\n",
    "model = RandomForestClassifier()\n",
    "model.fit(train.features, train.labels.ravel())\n",
    "\n",
    "# Predict on test set\n",
    "preds = model.predict(test.features)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 4: Evaluate Fairness Metrics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Disparate Impact: 0.72\n",
      "Statistical Parity Difference: -0.18\n",
      "False Positive Rate Difference: 0.18\n"
     ]
    }
   ],
   "source": [
    "# Define privileged and unprivileged groups\n",
    "# We assume 'race' is encoded as 1 = Caucasian, 0 = Non-Caucasian\n",
    "priv_group = [{'race': 1}]\n",
    "unpriv_group = [{'race': 0}]\n",
    "\n",
    "# Dataset-level metrics\n",
    "metric = BinaryLabelDatasetMetric(test, privileged_groups=priv_group, unprivileged_groups=unpriv_group)\n",
    "print(\"Disparate Impact:\", round(metric.disparate_impact(), 2))\n",
    "print(\"Statistical Parity Difference:\", round(metric.statistical_parity_difference(), 2))\n",
    "\n",
    "# Classification metrics\n",
    "cls_metric = ClassificationMetric(test, dataset, unprivileged_groups=unpriv_group, privileged_groups=priv_group)\n",
    "print(\"False Positive Rate Difference:\", round(cls_metric.false_positive_rate_difference(), 2))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 5: Visualize Disparities"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZAAAAGQCAY..."
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "labels = ['Caucasian', 'Non-Caucasian']\n",
    "fpr = [cls_metric.false_positive_rate(privileged=True), cls_metric.false_positive_rate(privileged=False)]\n",
    "\n",
    "plt.figure(figsize=(6,4))\n",
    "plt.bar(labels, fpr, color=['#4E79A7', '#F28E2B'])\n",
    "plt.ylabel('False Positive Rate')\n",
    "plt.title('False Positive Rates by Race')\n",
    "plt.ylim(0, 0.5)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 6: Summary of Findings and Remediation Steps (300 words)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}

NameError: name 'null' is not defined