## County Level Analysis

#### Import Libraries

In [166]:
import pandas as pd
import numpy as np
import seaborn as sns
%matplotlib inline 
import matplotlib.pyplot as plt
import plotly.express as px

#### Creating Dataframe for each county

In [167]:
clairborne_df = pd.read_csv('../Data/Claiborne_county_synthetic_data.csv', usecols = ["Race", "Age", "Gender", "Education Level", "Employment Status", "Income Level", "Prior Convictions", "Risk Score", "Judge Decision", "Re-offense"], index_col=False)
clairborne_df.head()

Unnamed: 0,Race,Age,Gender,Education Level,Employment Status,Income Level,Prior Convictions,Risk Score,Judge Decision,Re-offense
0,Black,62,Male,Bachelor's Degree,Employed,34165.969893,5,1.949816,1,0
1,Black,48,Male,Bachelor's Degree,Employed,73102.340993,3,7.631,0,0
2,Black,44,Male,High School,Unemployed,78046.909354,2,6.278393,1,0
3,Black,63,Female,Bachelor's Degree,Employed,121869.327498,1,5.098658,0,0
4,Black,53,Female,Bachelor's Degree,Employed,73289.622946,2,3.724815,1,0


In [168]:
warren_df = pd.read_csv('../Data/Warren_county_synthetic_data.csv', usecols = ["Race", "Age", "Gender", "Education Level", "Employment Status", "Income Level", "Prior Convictions", "Risk Score", "Judge Decision", "Re-offense"], index_col=False)
warren_df.head()

Unnamed: 0,Race,Age,Gender,Education Level,Employment Status,Income Level,Prior Convictions,Risk Score,Judge Decision,Re-offense
0,White,18,Male,High School,Unemployed,34165.969893,1,3.899632,1,0
1,Black,37,Male,High School,Employed,34114.425797,2,7.631,0,0
2,White,49,Male,Less than High School,Unemployed,36421.891032,0,6.278393,0,0
3,White,33,Female,Less than High School,Employed,121869.327498,1,1.019732,1,0
4,Black,19,Male,Bachelor's Degree,Employed,73289.622946,1,5.587222,0,0


In [169]:
copiah_df = pd.read_csv('../Data/Copiah_county_synthetic_data.csv', usecols = ["Race", "Age", "Gender", "Education Level", "Employment Status", "Income Level", "Prior Convictions", "Risk Score", "Judge Decision", "Re-offense"], index_col=False)
copiah_df.head()

Unnamed: 0,Race,Age,Gender,Education Level,Employment Status,Income Level,Prior Convictions,Risk Score,Judge Decision,Re-offense
0,White,48,Female,Bachelor's Degree,Employed,34165.969893,1,8.774172,0,1
1,Black,70,Male,High School,Unemployed,34114.425797,1,8.721143,0,1
2,White,73,Male,High School,Employed,15609.381871,1,7.324792,1,0
3,White,26,Male,Some College,Unemployed,34123.411699,0,6.11839,1,0
4,White,38,Female,Master's Degree,Employed,73289.622946,0,2.793611,1,0


In [170]:
clairborne_df['County'] = 'Claiborne'
warren_df['County'] = 'Warren'
copiah_df['County'] = 'Copiah'

# Combine the dataframes
combined_df = pd.concat([clairborne_df, warren_df, copiah_df], ignore_index=True)

# Display the first few rows
combined_df.head()

Unnamed: 0,Race,Age,Gender,Education Level,Employment Status,Income Level,Prior Convictions,Risk Score,Judge Decision,Re-offense,County
0,Black,62,Male,Bachelor's Degree,Employed,34165.969893,5,1.949816,1,0,Claiborne
1,Black,48,Male,Bachelor's Degree,Employed,73102.340993,3,7.631,0,0,Claiborne
2,Black,44,Male,High School,Unemployed,78046.909354,2,6.278393,1,0,Claiborne
3,Black,63,Female,Bachelor's Degree,Employed,121869.327498,1,5.098658,0,0,Claiborne
4,Black,53,Female,Bachelor's Degree,Employed,73289.622946,2,3.724815,1,0,Claiborne


In [171]:
# Binning Risk Score
bins = [0, 2, 5, 8, 11]
labels = ['0-2', '3-5', '6-8', '9-11']
combined_df['Risk Score Bin'] = pd.cut(combined_df['Risk Score'], bins=bins, labels=labels, right=True)
combined_df.head()

Unnamed: 0,Race,Age,Gender,Education Level,Employment Status,Income Level,Prior Convictions,Risk Score,Judge Decision,Re-offense,County,Risk Score Bin
0,Black,62,Male,Bachelor's Degree,Employed,34165.969893,5,1.949816,1,0,Claiborne,0-2
1,Black,48,Male,Bachelor's Degree,Employed,73102.340993,3,7.631,0,0,Claiborne,6-8
2,Black,44,Male,High School,Unemployed,78046.909354,2,6.278393,1,0,Claiborne,6-8
3,Black,63,Female,Bachelor's Degree,Employed,121869.327498,1,5.098658,0,0,Claiborne,6-8
4,Black,53,Female,Bachelor's Degree,Employed,73289.622946,2,3.724815,1,0,Claiborne,3-5


#### Comparison of distributions across the three counties

In [172]:
# Comparison of racial distributions across the three counties

# Count occurrences of each Race per County
race_counts = combined_df.groupby(['Race', 'County']).size().reset_index(name='Count')

# Create the bar chart with explicit counts
fig = px.bar(
    race_counts,
    x='Race',
    y='Count',  # Explicitly set Y to Count
    color='County',
    barmode='group',
    title='Racial Distribution Across Counties',
    labels={'Race': 'Race', 'Count': 'Count'},
    category_orders={"County": ["Claiborne", "Warren", "Copiah"]},
    template='plotly_white',
    color_discrete_map={'Claiborne': '#ee836e', 'Warren': '#9fe598', 'Copiah': '#7b85d4'},
    text='Count'
)

fig.update_layout(xaxis_title="Race", yaxis_title="Count")
fig.show()

In [173]:
# Comparison of gender distributions across the three counties

# Count occurrences of each Gender per County
race_counts = combined_df.groupby(['Gender', 'County']).size().reset_index(name='Count')

# Create the bar chart with explicit counts
fig = px.bar(
    race_counts,
    x='Gender',
    y='Count',  # Explicitly set Y to Count
    color='County',
    barmode='group',
    title='Gender Distribution Across Counties',
    labels={'Gender': 'Gender', 'Count': 'Count'},
    category_orders={"County": ["Claiborne", "Warren", "Copiah"]},
    template='plotly_white',
    color_discrete_map={'Claiborne': '#ee836e', 'Warren': '#9fe598', 'Copiah': '#7b85d4'},
    text='Count'
)

fig.update_layout(xaxis_title="Gender", yaxis_title="Count")
fig.show()

In [174]:
# Comparison of education level distributions across the three counties

# Count occurrences of each Education Level per County
education_counts = combined_df.groupby(['Education Level', 'County']).size().reset_index(name='Count')

# Create the bar chart
fig = px.bar(
    education_counts,
    x='Education Level',
    y='Count',
    color='County',
    barmode='group',
    title='Education Level Distribution Across Counties',
    labels={'Education Level': 'Education Level', 'Count': 'Count'},
    category_orders={"County": ["Claiborne", "Warren", "Copiah"]},
    template='plotly_white',
    color_discrete_map={'Claiborne': '#ee836e', 'Warren': '#9fe598', 'Copiah': '#7b85d4'},
    text='Count'
)

fig.update_layout(xaxis_title="Education Level", yaxis_title="Count")
fig.show()



#### Comparison or risk scores across Counties

In [175]:
# Average of risk scores by County
avg_risk_score_county = combined_df.groupby('County')['Risk Score'].mean()
avg_risk_score_county

County
Claiborne    5.750037
Copiah       5.204003
Warren       5.294246
Name: Risk Score, dtype: float64

In [176]:
# Visualizing risk scores by County
# Calculate average risk scores based on race per county
avg_risk_score_race_county = combined_df.groupby(['County', 'Race'])['Risk Score'].mean().reset_index()

# Plot the data
fig = px.bar(
    avg_risk_score_race_county,
    x='County',
    y='Risk Score',
    color='Race',
    barmode='group',
    title='Average Risk Score by Race and County',
    labels={'Risk Score': 'Average Risk Score'},
    text_auto=True,
    template='plotly_white',
    color_discrete_sequence=px.colors.qualitative.Set2
)

fig.update_layout(
    xaxis_title='County',
    yaxis_title='Average Risk Score',
    width=900,
    height=400
)

fig.show()

In [177]:
# Visualizing risk scores by County
# Calculate average risk scores based on gender per county
avg_risk_score_gender_county = combined_df.groupby(['County', 'Gender'])['Risk Score'].mean().reset_index()

# Plot the data
fig = px.bar(
    avg_risk_score_gender_county,
    x='County',
    y='Risk Score',
    color='Gender',
    barmode='group',
    title='Average Risk Score by Gender and County',
    labels={'Risk Score': 'Average Risk Score'},
    text_auto=True,
    template='plotly_white',
    color_discrete_sequence=px.colors.qualitative.Set3
)

fig.update_layout(
    xaxis_title='County',
    yaxis_title='Average Risk Score',
    width=900,
    height=400
)

fig.show()

In [178]:
# Summary Table of Average Risk Scores by County by race and gender

# Merge the average risk scores by race and gender into a single summary table
summary_table = pd.merge(
    avg_risk_score_race_county,
    avg_risk_score_gender_county,
    on='County',
    suffixes=('_Race', '_Gender')
)

# Rename columns for clarity
summary_table.rename(columns={'Risk Score_Race': 'Avg Risk Score by Race', 'Risk Score_Gender': 'Avg Risk Score by Gender'}, inplace=True)

# Display the summary table
summary_table

Unnamed: 0,County,Race,Avg Risk Score by Race,Gender,Avg Risk Score by Gender
0,Claiborne,Black,5.910654,Female,5.762968
1,Claiborne,Black,5.910654,Male,5.736845
2,Claiborne,Other,4.948736,Female,5.762968
3,Claiborne,Other,4.948736,Male,5.736845
4,Claiborne,White,4.5043,Female,5.762968
5,Claiborne,White,4.5043,Male,5.736845
6,Copiah,Black,5.846286,Female,5.175465
7,Copiah,Black,5.846286,Male,5.231423
8,Copiah,Other,4.53689,Female,5.175465
9,Copiah,Other,4.53689,Male,5.231423


In [181]:
# Group by County, Gender, and Race, and calculate the average risk score
summary_table_gender_race = combined_df.groupby(['County', 'Gender', 'Race'])['Risk Score'].mean().reset_index()

# Rename the column for clarity
summary_table_gender_race.rename(columns={'Risk Score': 'Avg Risk Score'}, inplace=True)

# Display the summary table
summary_table_gender_race

Unnamed: 0,County,Gender,Race,Avg Risk Score
0,Claiborne,Female,Black,5.952361
1,Claiborne,Female,Other,4.859547
2,Claiborne,Female,White,4.363324
3,Claiborne,Male,Black,5.868757
4,Claiborne,Male,Other,5.216301
5,Claiborne,Male,White,4.650697
6,Copiah,Female,Black,5.733967
7,Copiah,Female,Other,5.194471
8,Copiah,Female,White,4.81316
9,Copiah,Male,Black,5.954553


In [184]:
# Visualizing risk scores by County, Gender and Race

fig = px.bar(
    summary_table_gender_race,
    x='County',
    y='Avg Risk Score',
    color='Gender',
    facet_col='Race',
    barmode='group',
    title='Average Risk Score by County, Gender, and Race',
    labels={'Avg Risk Score': 'Average Risk Score'},
    text_auto=True,
    template='plotly_white',
    color_discrete_sequence=px.colors.qualitative.Set3
    )

fig.update_layout(
    xaxis_title='County',
    yaxis_title='Average Risk Score',
    width=1200,
    height=500
)

fig.show()

#### Comparative Analysis of Judges’ Bail Decisions to AI Risk Scores

In [179]:
# Cross-tabulate Risk Score Bin and Judge Decision segmented by County
cross_tab_county = combined_df.groupby(['County', 'Risk Score Bin', 'Judge Decision']).size().reset_index(name='Count')

cross_tab_county['Judge Decision'] = cross_tab_county['Judge Decision'].astype(str)

# Plot stacked bar chart segmented by Race with separate Y-axis
fig = px.bar(
    cross_tab_county, 
    x="Risk Score Bin", 
    y="Count", 
    color="Judge Decision",  # Stack by judge decision
    facet_col="County",  # Separate plots for each county group
    title="Cross-tabulate Risk Scores and Judges' Decisions by County", 
    labels={'Risk Score Bin': 'Risk Score Range', 'Count': 'Frequency'},
    text_auto=True,
    template='plotly_white',
    color_discrete_map={'0': '#ee836e', '1': '#9fe598'}
)

# Update layout for separate Y-axis per facet
fig.update_yaxes(matches=None)  # Ensures each facet has its own Y-scale
fig.for_each_yaxis(lambda yaxis: yaxis.update(showticklabels=True))

fig.show()





In [180]:
# Analyzing Re-offense Rates by County and Judge Decision
cross_tab_county_decision = combined_df.groupby(['County', 'Re-offense', 'Judge Decision']).size().reset_index(name='Count')

cross_tab_county_decision['Re-offense'] = cross_tab_county_decision['Re-offense'].astype(str)
cross_tab_county_decision['Judge Decision'] = cross_tab_county_decision['Judge Decision'].astype(str)

fig = px.bar(
    cross_tab_county_decision, 
    x='County', 
    y='Count', 
    color='Re-offense', 
    barmode='group', 
    facet_col='Judge Decision', 
    labels={'Count': 'Frequency', 'Re-offense': 'Re-offense Status'}, 
    title='Re-offense Rates by County and Judge Decision',
    text_auto=True,
    color_discrete_map={'0': '#ee836e', '1': '#9fe598'}
)

fig.update_layout(
    xaxis_title='County',
    yaxis_title='Frequency',
    template='plotly_white',
    width=1200,
    height=400
)

fig.show()

### Demographic Parity

In [185]:
# Demographic Parity Across Racial Groups across Counties
# Calculate demographic parity across racial groups
demographic_parity = combined_df.groupby(['County', 'Race'])['Judge Decision'].mean().reset_index()

# Rename columns for clarity
demographic_parity.rename(columns={'Judge Decision': 'Demographic Parity'}, inplace=True)

# Display the demographic parity table
demographic_parity

Unnamed: 0,County,Race,Demographic Parity
0,Claiborne,Black,0.326531
1,Claiborne,Other,0.583333
2,Claiborne,White,0.707547
3,Copiah,Black,0.335958
4,Copiah,Other,0.708333
5,Copiah,White,0.715966
6,Warren,Black,0.336207
7,Warren,Other,0.65
8,Warren,White,0.709677


In [186]:
# Visualize Demographic Parity Across Racial Groups
fig = px.bar(
    demographic_parity,
    x='Race',
    y='Demographic Parity',
    color='County',
    barmode='group',
    title='Demographic Parity Across Racial Groups by County',
    labels={'Demographic Parity': 'Demographic Parity'},
    template='plotly_white',
    color_discrete_map={'Claiborne': '#ee836e', 'Warren': '#9fe598', 'Copiah': '#7b85d4'},
    text='Demographic Parity'
)

fig.update_layout(
    xaxis_title='Race',
    yaxis_title='Demographic Parity',
    width=900,
    height=400
)

fig.show()