In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
sns.set_theme()
sns.set_palette(sns.color_palette("pastel"))
%matplotlib inline

### Import data

In [None]:
df_c1 = pd.read_csv('C1. copiah_county_synthetic_data.csv')
df_c2 = pd.read_csv('C2. claiborne_county_synthetic_data.csv')
df_c3 = pd.read_csv('C3. warren_county_synthetic_data.csv')

In [None]:
df_c1.columns == df_c2.columns

In [None]:
df_c1.columns == df_c3.columns

In [None]:
df_c1.head()

In [None]:
df_c2.head()

In [None]:
df_c1.describe()

In [None]:
df_c2.describe()

In [None]:
df_c3.describe()

### Preprocessing

In [None]:
df = pd.concat([df_c1, df_c2, df_c3])

In [None]:
df.shape

In [None]:
df['Age'].describe()

In [None]:
# Apply to dataframe
def age_bins(x):
    if x <= 25:
        return "18-25"
    elif x <= 35:
        return "26-35"
    elif x <= 45:
        return "36-45"
    elif x <= 55:
        return "46-55"
    elif x <= 65:
        return "56-65"
    else:
        return "66+"

    
df['Age_Group'] = df['Age'].apply(age_bins)

In [None]:
df[['Race', 'Risk Score']].groupby('Race').count()

In [None]:
df[['Gender', 'Risk Score']].groupby('Gender').count()

In [None]:
df[['Education Level', 'Risk Score']].groupby('Education Level').count()

---
### 1. Analyze the demographic profiles of three counties:

1. Evaluate and visualize the demographic characteristics (race, gender, and education) in each county using pivot tables (Excel) or Pandas (Python).
2. Deliverable: A summary of demographic data visualized through pie or bar charts and a comparison of racial and gender distributions across counties.

In [None]:
sns.displot(df, x='Race', col='County', height=3, hue='Race', alpha=0.8)
plt.plot()

In [None]:
sns.displot(df, x='Gender', col='County', height=3, hue='Gender', alpha=0.8)
plt.plot()

In [None]:
col_order = ['Less than High School', 'High School', 'Some College', "Bachelor's Degree", "Master's Degree", 'PhD']
df['Education Level'] = pd.Categorical(df['Education Level'], col_order)

sns.displot(df, x='Education Level', col='County', height=3, hue='Education Level', alpha=0.8, hue_order=col_order, row_order=col_order).set_xticklabels("")
plt.plot()

---
### 2. Evaluate risk scores across demographic groups:

1. Analyze the distribution of risk scores by race and gender within each county.
2. Deliverable: A summary table showing average risk scores for each demographic group, with accompanying visualizations (e.g., bar charts).
3. Provide an analysis of how risk scores vary across counties and demographic groups.

In [None]:
df[['Race', 'Risk Score']].groupby('Race').describe()

In [None]:
df[['Gender', 'Risk Score']].groupby('Gender').describe()

In [None]:
sns.boxplot(df, y='Race', x='Risk Score', hue='Race')

In [None]:
# Create FacetGrid
g = sns.FacetGrid(df, col='County', height=4, aspect=1.5)

# Map boxplot to the grid
g.map_dataframe(
    sns.boxplot,
    x='Risk Score',
    y='Race'
)

# Adjust the layout
plt.tight_layout()

# Show the plot
plt.show()

In [None]:
sns.boxplot(df, y='Gender', x='Risk Score', hue='Gender')

In [None]:
# Create FacetGrid
g = sns.FacetGrid(df, col='County', height=4, aspect=1.5)

# Map boxplot to the grid
g.map_dataframe(
    sns.boxplot,
    x='Risk Score',
    y='Gender'
)

# Adjust the layout
plt.tight_layout()

# Show the plot
plt.show()

In [None]:
# Create FacetGrid
g = sns.FacetGrid(df, col='County', height=3, aspect=1.5)

# Map boxplot to the grid
g.map_dataframe(
    sns.histplot,
    x='Risk Score',
    bins=np.arange(0.5, 11.5, 1),
    alpha=0.8,
    kde=True,
    kde_kws={
        'bw_adjust': 1.5,
        'cut': 0,
    }
)

# Adjust the layout
plt.tight_layout()
plt.xticks(range(11))

# Show the plot
plt.show()

In [None]:
race_risk_hist = sns.histplot(
    data=df,
    x='Risk Score',
    hue='Race',
    multiple="dodge",
    # stat='count',
    element='bars',
    alpha=0.8,
    edgecolor='white',
    linewidth=1,
    bins=np.arange(0.5, 11.5, 1),
    kde=True,
    kde_kws={
        'bw_adjust': 1.5,
        'cut': 0,
    }
)

race_risk_hist.set_xticks([i for i in range(0,11)])
plt.tight_layout()
plt.show()

---
### 3. Compare judge decisions to AI risk scores:

1. Investigate the alignment between judges’ bail decisions and AI-generated risk scores across racial and gender groups.
2. Deliverable: Cross-tabulate risk scores and judge decisions using stacked bar charts to evaluate consistency. Discuss patterns of bias in decision-making, highlighting any discrepancies between groups.

In [None]:
# overview 
pd.crosstab([df['Race'], df['Judge Decision']], df['Risk Score'])

In [None]:
# overview 
pd.crosstab([df['Gender'], df['Judge Decision']], df['Risk Score'])

In [None]:
race_pivot = pd.crosstab([df['County'], df['Race'], df['Judge Decision']], df['Risk Score'], margins=False)
race_pivot

In [None]:
gender_pivot = pd.crosstab([df['County'], df['Gender'], df['Judge Decision']], df['Risk Score'], margins=False)
gender_pivot

In [None]:
# Set up the figure with 3 subplots
fig, axes = plt.subplots(3, 1, figsize=(12, 15))

# Color scheme
cmap = 'YlOrRd'  # or 'RdYlBu_r' for a diverging colormap

# Process each county
for idx, (county, county_data) in enumerate(race_pivot.groupby(level=0)):
    # Create matrix for this county
    county_matrix = county_data.reset_index()
    county_matrix['Group'] = county_matrix['Race'] + '\n(' + county_matrix['Judge Decision'] + ')'
    heatmap_matrix = county_matrix.set_index('Group').iloc[:, 3:]
    
    # Create heatmap
    sns.heatmap(heatmap_matrix,
                cmap=cmap,
                annot=True,
                fmt='g',
                cbar_kws={'label': 'Count'},
                ax=axes[idx],
                square=False,
                robust=True,
                annot_kws={'size': 9},
                linewidths=0.5,
                linecolor='white')
    
    # Add horizontal lines between racial groups
    race_sizes = county_matrix.groupby('Race').size()
    current_pos = 0
    for size in race_sizes:
        axes[idx].axhline(y=current_pos + size, color='black', linewidth=2)
        current_pos += size
    
    # Customize subplot
    axes[idx].set_title(f'{county}', pad=10, fontsize=12)
    axes[idx].set_xlabel('Risk Score')
    axes[idx].set_ylabel('')
    
    # Rotate labels for better readability
    axes[idx].set_xticklabels(axes[idx].get_xticklabels(), rotation=0)
    axes[idx].set_yticklabels(axes[idx].get_yticklabels(), rotation=0)

# Adjust layout
plt.tight_layout()
plt.show()

In [None]:
race_pivot = pd.crosstab([df['County'], df['Race']], df['Judge Decision'], margins=True)
race_pivot['% Denied'] = round(race_pivot['Denied'] / race_pivot['All'] * 100, 1)
race_pivot['% Granted'] = round(race_pivot['Granted'] / race_pivot['All'] * 100, 1)
race_pivot

In [None]:
gender_pivot = pd.crosstab([df['County'], df['Gender']], df['Judge Decision'], margins=True)
gender_pivot['% Denied'] = round(gender_pivot['Denied'] / gender_pivot['All'] * 100, 1)
gender_pivot['% Granted'] = round(gender_pivot['Granted'] / gender_pivot['All'] * 100, 1)
gender_pivot

In [None]:
fig, axes = plt.subplots(3, 1, figsize=(12, 10))

for i, col in enumerate(df['Race'].unique()):
    race_data = df[df['Race'] == col]
    ct = pd.crosstab(race_data['Risk Score'], 
                     race_data['Judge Decision'],
                     normalize='index') * 100

        # Reindex to ensure all risk scores are present
    ct = ct.reindex(range(1, 11))  # This will add missing risk scores with NaN values
    ct = ct.fillna(0)  # Fill NaN with 0
    
    # Plot directly to the specific axis
    ct.plot(kind='bar', 
           stacked=True, 
           ax=axes[i],
           title=f'Judge Decisions by Risk Score - {col}')
    
    # Customize each subplot
    axes[i].set_xlabel('Risk Score')
    axes[i].set_ylabel('Percentage of Decisions')
    axes[i].legend(title='Decision', bbox_to_anchor=(1.05, 1))

# Add ONLY ONE tight_layout at the end
plt.tight_layout()
# Add ONLY ONE show at the end
plt.show()

In [None]:
fig, axes = plt.subplots(2, 1, figsize=(8, 10))

for i, col in enumerate(df['Gender'].unique()):
    race_data = df[df['Gender'] == col]
    ct = pd.crosstab(race_data['Risk Score'], 
                     race_data['Judge Decision'],
                     normalize='index') * 100
    
    # Plot directly to the specific axis
    ct.plot(kind='bar', 
           stacked=True, 
           ax=axes[i],
           title=f'Judge Decisions by Risk Score - {col}')
    
    # Customize each subplot
    axes[i].set_xlabel('Risk Score')
    axes[i].set_ylabel('Percentage of Decisions')
    axes[i].legend(title='Decision', bbox_to_anchor=(1.05, 1))

# Add ONLY ONE tight_layout at the end
plt.tight_layout()
# Add ONLY ONE show at the end
plt.show()

In [None]:
sns.boxplot(data=df, x='Race', y='Risk Score', hue='Judge Decision')
plt.title('Risk Score Distribution by Race and Decision')
plt.xlabel('Race')
plt.ylabel('Risk Score')
plt.plot()

In [None]:
sns.boxplot(data=df, x='Gender', y='Risk Score', hue='Judge Decision')
plt.title('Risk Score Distribution by Gender and Decision')
plt.xlabel('Gender')
plt.ylabel('Risk Score')
plt.plot()

### 

---
### Additional analysis, 
Testing re-offense against risk score and judge bail decisions per race and gender. 

In [None]:
gender_pivot_reoffense = pd.crosstab([df['County'], df['Gender']], df['Re-offense'], margins=True)
gender_pivot_reoffense['% No'] = round(gender_pivot_reoffense['No'] / gender_pivot_reoffense['All'] * 100, 1)
gender_pivot_reoffense['% Yes'] = round(gender_pivot_reoffense['Yes'] / gender_pivot_reoffense['All'] * 100, 1)
gender_pivot_reoffense

In [None]:
race_pivot_reoffense = pd.crosstab([df['County'], df['Race']], df['Re-offense'], margins=True)
race_pivot_reoffense['% No'] = round(race_pivot_reoffense['No'] / race_pivot_reoffense['All'] * 100, 1)
race_pivot_reoffense['% Yes'] = round(race_pivot_reoffense['Yes'] / race_pivot_reoffense['All'] * 100, 1)
race_pivot_reoffense

In [None]:
race_pivot_score_reoffense = pd.crosstab([df['Race'], df['Risk Score']], df['Re-offense'], margins=True)
race_pivot_score_reoffense['% No'] = round(race_pivot_score_reoffense['No'] / race_pivot_score_reoffense['All'] * 100, 1)
race_pivot_score_reoffense['% Yes'] = round(race_pivot_score_reoffense['Yes'] / race_pivot_score_reoffense['All'] * 100, 1)
race_pivot_score_reoffense

In [None]:
gender_pivot_score_reoffense = pd.crosstab([df['Gender'], df['Risk Score']], df['Re-offense'], margins=True)
gender_pivot_score_reoffense['% No'] = round(gender_pivot_score_reoffense['No'] / gender_pivot_score_reoffense['All'] * 100, 1)
gender_pivot_score_reoffense['% Yes'] = round(gender_pivot_score_reoffense['Yes'] / gender_pivot_score_reoffense['All'] * 100, 1)
gender_pivot_score_reoffense

In [None]:
race_pivot_judge_reoffense = pd.crosstab([df['Race'], df['Judge Decision']], df['Re-offense'], margins=True)
race_pivot_judge_reoffense['% No'] = round(race_pivot_judge_reoffense['No'] / race_pivot_judge_reoffense['All'] * 100, 1)
race_pivot_judge_reoffense['% Yes'] = round(race_pivot_judge_reoffense['Yes'] / race_pivot_judge_reoffense['All'] * 100, 1)
race_pivot_judge_reoffense

In [None]:
gender_pivot_judge_reoffense = pd.crosstab([df['Gender'], df['Judge Decision']], df['Re-offense'], margins=True)
gender_pivot_judge_reoffense['% No'] = round(gender_pivot_judge_reoffense['No'] / gender_pivot_judge_reoffense['All'] * 100, 1)
gender_pivot_judge_reoffense['% Yes'] = round(gender_pivot_judge_reoffense['Yes'] / gender_pivot_judge_reoffense['All'] * 100, 1)
gender_pivot_judge_reoffense

In [None]:
sns.boxplot(data=df, x='Gender', y='Risk Score', hue='Re-offense')
plt.title('Risk Score Distribution by Gender and Re-Offense')
plt.xlabel('Gender')
plt.ylabel('Risk Score')
plt.plot()

In [None]:
sns.boxplot(data=df, x='Race', y='Risk Score', hue='Re-offense')
plt.title('Risk Score Distribution by Race and Re-Offense')
plt.xlabel('Race')
plt.ylabel('Risk Score')
plt.plot()

---
### 4A. Analyze re-offense rates and fairness metrics:
1. Calculate re-offense rates and key fairness metrics, including False Positive Rates (FPR) and False Negative Rates (FNR), for each racial group.
2. Deliverable: Bar charts comparing FPR and FNR across racial groups, accompanied by an interpretation of any disparities found.

In [None]:
from sklearn.metrics import confusion_matrix

In [None]:
df[['Race', 'Judge Decision', 'Re-offense']].to_clipboard()

In [None]:
# assume denied bail / re-offense = 0
# assume granted bail / no re-offense = 1
# assume re-offense is the ground truth

y_pred = df['Judge Decision'].apply(lambda x: 1 if x=="Granted" else 0) # because denied should 'predicts' re-offense

y = df['Re-offense'].apply(lambda x: 0 if x=="Yes" else 1)


In [None]:
results_dict_4a = {}

for col in df['Race'].unique():
    results_dict_4a[col] = {}

    tn, fp, fn, tp = confusion_matrix(y[df['Race'] == col], y_pred[df['Race'] == col]).ravel()

    # Sensitivity, hit rate, recall, or true positive rate
    tpr = tp/(tp+fn)
    # Specificity or true negative rate
    tnr = tn/(tn+fp)
    # Precision or positive predictive value
    ppv = tp/(tp+fp)
    # Negative predictive value
    npv = tn/(tn+fn)
    # Fall out or false positive rate
    fpr = fp/(fp+tn)
    # False negative rate
    fnr = fn/(tp+fn)
    # False discovery rate
    fdr = fp/(tp+fp)
    # Overall accuracy
    acc = (tp+tn)/(tp+fp+fn+tn)

    results_dict_4a[col]['FPR'] = round(fpr, 3)
    results_dict_4a[col]['FNR'] = round(fnr, 3)
    results_dict_4a[col]['PPV'] = round(ppv, 3) # precision
    results_dict_4a[col]['TPR'] = round(tpr, 3) # recall 
    results_dict_4a[col]['Acc'] = round(acc, 3) # accuracy
    results_dict_4a[col]['Negative Rate'] = round((tn+fn) / (tp+fp+fn+tn), 3)
    results_dict_4a[col]['Positive Rate'] = round((tp+fp) / (tp+fp+fn+tn), 3)
    results_dict_4a[col]['Group Population'] = (tp+fp+fn+tn)
    results_dict_4a[col]['Group TN'] = tn
    results_dict_4a[col]['Group TP'] = tp
    results_dict_4a[col]['Group FN'] = fn
    results_dict_4a[col]['Group FP'] = fp


In [None]:
metrics_df_4a = pd.DataFrame(results_dict_4a)
metrics_df_4a

In [None]:
# calculate disparities
# white

disparity_white = pd.DataFrame({
    'White': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0],  # White/White = 1.0
    'Black': round(metrics_df_4a['Black'] / metrics_df_4a['White'], 3),
    'Other': round(metrics_df_4a['Other'] / metrics_df_4a['White'], 3)
}, index=['FPR', 'FNR', 'PPV', 'TPR', 'Positive Rate', 'Negative Rate'])

disparity_white

In [None]:
# calculate disparities
# black

disparity_black = pd.DataFrame({
    'White': round(metrics_df_4a['White'] / metrics_df_4a['Black'],3),
    'Black': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
    'Other': round(metrics_df_4a['Other'] / metrics_df_4a['Black'],3)
}, index=['FPR', 'FNR', 'PPV', 'TPR', 'Positive Rate', 'Negative Rate'])

disparity_black

In [None]:
# calculate disparities
# others

disparity_other = pd.DataFrame({
    'White': round(metrics_df_4a['White'] / metrics_df_4a['Other'], 3),
    'Black': round(metrics_df_4a['Black'] / metrics_df_4a['Other'], 3),
    'Other': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
}, index=['FPR', 'FNR', 'PPV', 'TPR', 'Positive Rate', 'Negative Rate'])

disparity_other

In [None]:
metrics_df_4a.T[['FPR', 'FNR']].plot(kind='bar')

In [None]:
metrics_df_4a.T.reset_index()

---
### 4B. Analyze re-offense rates and fairness metrics:
1. Calculate re-offense rates and key fairness metrics, including False Positive Rates (FPR) and False Negative Rates (FNR), for each racial group.
2. Deliverable: Bar charts comparing FPR and FNR across racial groups, accompanied by an interpretation of any disparities found.

In [None]:
from sklearn.metrics import confusion_matrix

In [None]:
# assume re-offense = 0
# assume no re-offense = 1
# assume re-offense / not is the ground truth (y)

# assume risk score <= risk_threshold as 1 (positive outcome)
# assume risk score > risk_threshold as 0 (negative outcome)

risk_threshold = 5 # can be changed to check different metrices
y_pred_4b = df['Risk Score'].apply(lambda x: 1 if x<=risk_threshold else 0)

y_4b = df['Re-offense'].apply(lambda x: 0 if x=="Yes" else 1)


In [None]:
results_dict_4b = {}

for col in df['Race'].unique():
    results_dict_4b[col] = {}

    tn, fp, fn, tp = confusion_matrix(y_4b[df['Race'] == col], y_pred_4b[df['Race'] == col]).ravel()

    # Sensitivity, hit rate, recall, or true positive rate
    tpr = tp/(tp+fn)
    # Specificity or true negative rate
    tnr = tn/(tn+fp)
    # Precision or positive predictive value
    ppv = tp/(tp+fp)
    # Negative predictive value
    npv = tn/(tn+fn)
    # Fall out or false positive rate
    fpr = fp/(fp+tn)
    # False negative rate
    fnr = fn/(tp+fn)
    # False discovery rate
    fdr = fp/(tp+fp)
    # Overall accuracy
    acc = (tp+tn)/(tp+fp+fn+tn)

    results_dict_4b[col]['FPR'] = round(fpr, 3)
    results_dict_4b[col]['FNR'] = round(fnr, 3)
    results_dict_4b[col]['PPV'] = round(ppv, 3) # precision
    results_dict_4b[col]['TPR'] = round(tpr, 3) # recall 
    results_dict_4b[col]['Acc'] = round(acc, 3) # accuracy
    results_dict_4b[col]['Negative Rate'] = round((tn+fn) / (tp+fp+fn+tn), 3)
    results_dict_4b[col]['Positive Rate'] = round((tp+fp) / (tp+fp+fn+tn), 3)
    results_dict_4b[col]['Group Population'] = (tp+fp+fn+tn)
    results_dict_4b[col]['Group TN'] = tn
    results_dict_4b[col]['Group TP'] = tp
    results_dict_4b[col]['Group FN'] = fn
    results_dict_4b[col]['Group FP'] = fp

In [None]:
metrics_df_4b = pd.DataFrame(results_dict_4b)
metrics_df_4b

In [None]:
# calculate disparities
# white

disparity_white = pd.DataFrame({
    'White': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0],  # White/White = 1.0
    'Black': round(metrics_df_4b['Black'] / metrics_df_4b['White'], 3),
    'Other': round(metrics_df_4b['Other'] / metrics_df_4b['White'], 3)
}, index=['FPR', 'FNR', 'PPV', 'TPR', 'Positive Rate', 'Negative Rate'])

disparity_white

In [None]:
# calculate disparities
# black

disparity_black = pd.DataFrame({
    'White': round(metrics_df_4b['White'] / metrics_df_4b['Black'], 3),
    'Black': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
    'Other': round(metrics_df_4b['Other'] / metrics_df_4b['Black'], 3)
}, index=['FPR', 'FNR', 'PPV', 'TPR', 'Positive Rate', 'Negative Rate'])

disparity_black

In [None]:
# calculate disparities
# others

disparity_other = pd.DataFrame({
    'White': round(metrics_df_4b['White'] / metrics_df_4b['Other'], 3),
    'Black': round(metrics_df_4b['Black'] / metrics_df_4b['Other'], 3),
    'Other': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
}, index=['FPR', 'FNR', 'PPV', 'TPR', 'Positive Rate', 'Negative Rate'])

disparity_other

In [None]:
metrics_df_4b.T[['FPR', 'FNR']].plot(kind='bar')

In [None]:
metrics_df_4b.T.reset_index()