In [63]:
pip install fairlearn



In [64]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from fairlearn.metrics import MetricFrame, true_positive_rate, false_positive_rate

In [65]:
df = pd.read_csv('/content/recidivism_full.csv')

In [66]:
df.columns

Index(['ID', 'Gender', 'Race', 'Age_at_Release', 'Residence_PUMA',
       'Gang_Affiliated', 'Supervision_Risk_Score_First',
       'Supervision_Level_First', 'Education_Level', 'Dependents',
       'Prison_Offense', 'Prison_Years', 'Prior_Arrest_Episodes_Felony',
       'Prior_Arrest_Episodes_Misd', 'Prior_Arrest_Episodes_Violent',
       'Prior_Arrest_Episodes_Property', 'Prior_Arrest_Episodes_Drug',
       'Prior_Arrest_Episodes_PPViolationCharges',
       'Prior_Arrest_Episodes_DVCharges', 'Prior_Arrest_Episodes_GunCharges',
       'Prior_Conviction_Episodes_Felony', 'Prior_Conviction_Episodes_Misd',
       'Prior_Conviction_Episodes_Viol', 'Prior_Conviction_Episodes_Prop',
       'Prior_Conviction_Episodes_Drug',
       'Prior_Conviction_Episodes_PPViolationCharges',
       'Prior_Conviction_Episodes_DomesticViolenceCharges',
       'Prior_Conviction_Episodes_GunCharges', 'Prior_Revocations_Parole',
       'Prior_Revocations_Probation', 'Condition_MH_SA', 'Condition_Cog_Ed',
     

In [67]:
# Keep only the needed columns
df = df[df['Race'].isin(['BLACK', 'WHITE'])]
df = df.dropna(subset=['Age_at_Release', 'Prior_Arrest_Episodes_Felony', 'Prison_Offense', 'Recidivism_Within_3years'])

In [68]:
# One-hot encode the categorical column
df = pd.get_dummies(df, columns=['Prison_Offense'], drop_first=True)

In [78]:
# Replace '10 or more' with 10 and convert to numeric
df['Prior_Arrest_Episodes_Felony'] = df['Prior_Arrest_Episodes_Felony'].replace('10 or more', 10).astype(int)

# One-hot encode 'Age_at_Release'
df = pd.get_dummies(df, columns=['Age_at_Release'], drop_first=True)


# Select all required columns
features = ['Prior_Arrest_Episodes_Felony'] + [col for col in df.columns if 'Prison_Offense' in col or 'Age_at_Release' in col]
X = df[features]
y = df['Recidivism_Within_3years'].astype(int)
race = df['Race']

# Split the data after converting Age_at_Release to numeric
X_train, X_test, y_train, y_test, race_train, race_test = \
    train_test_split(X, y, race, test_size=0.3, stratify=race)

In [70]:
# Replace '10 or more' with 10 and convert to numeric
df['Prior_Arrest_Episodes_Felony'] = df['Prior_Arrest_Episodes_Felony'].replace('10 or more', 10).astype(int)

X_train, X_test, y_train, y_test, race_train, race_test = \
    train_test_split(X, y, race, test_size=0.3, stratify=race)

In [71]:
# One-hot encode the Gender column
df = pd.get_dummies(df, columns=['Gender'], drop_first=True)

In [79]:
# Train a Logistic Regression model
model = LogisticRegression(solver='liblinear')
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Calculate fairness metrics
metric_frame = MetricFrame(metrics={"True positive rate": true_positive_rate,
                                    "False positive rate": false_positive_rate},
                           y_true=y_test,
                           y_pred=y_pred,
                           sensitive_features=race_test)

# Display the fairness metrics
display(metric_frame.by_group)

Unnamed: 0_level_0,True positive rate,False positive rate
Race,Unnamed: 1_level_1,Unnamed: 2_level_1
BLACK,0.838667,0.588235
WHITE,0.783768,0.49921


In [74]:
from sklearn.metrics import accuracy_score

# Calculate the accuracy score
accuracy = accuracy_score(y_test, y_pred)

# Display the accuracy score
display(f"Accuracy Score: {accuracy}")

'Accuracy Score: 0.529403073286052'