In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, log_loss
from sklearn.metrics import mutual_info_score
import tensorflow as tf

Preprocess the data by encoding categorical variables and removing null values.

In [2]:
df = pd.read_csv('https://raw.githubusercontent.com/propublica/compas-analysis/master/compas-scores-two-years.csv')
df['score_text'] = df['score_text'].replace({'High': 2, 'Medium': 1, 'Low': 0})
df['score_text'] = df['score_text'].fillna(0)
df['v_score_text'] = df['v_score_text'].replace({'High': 2, 'Medium': 1, 'Low': 0})
df['v_score_text'] = df['v_score_text'].fillna(0)
df = df[df['race'].isin(['Caucasian', 'African-American'])]
df['race'] = df['race'].replace({'Caucasian': 1, 'African-American': 0})

Make a Logistic Regression model that predicts two_year_recidivism without any prejudice remover. Notice that without any prejudice removal, the model predicts recidivism more accurately for Caucasians compared to African Americans.

In [3]:
X = df[['id', 'age', 'juv_fel_count', 'juv_misd_count', 'is_recid', 'decile_score','juv_other_count', 'priors_count', 'v_score_text', 'is_violent_recid', 'race']].copy()
X = X.fillna(0)
Y = df['two_year_recid'].copy()
unfair_model = LogisticRegression(max_iter=1000)
unfair_model.fit(X, Y)

priv = X[X['race'] == 1]
unpriv = X[X['race'] == 0]

priv_pred = unfair_model.predict(priv)
unpriv_pred = unfair_model.predict(unpriv)

accuracy_priv = accuracy_score(Y[X['race'] == 1], priv_pred)
accuracy_unpriv = accuracy_score(Y[X['race'] == 0], unpriv_pred)

print("Accuracy for privileged group (race == 1):", accuracy_priv)
print("Accuracy for unprivileged group (race == 0):", accuracy_unpriv)

Accuracy for privileged group (race == 1): 0.9759576202118989
Accuracy for unprivileged group (race == 0): 0.963474025974026


Without the is_recid feature, the accuracies from the Logistric Regression model for the privileged and unprivileged groups both decrease significantly, while the difference between the two accuracies remains almost the same.

In [4]:
X = df[['id', 'age', 'juv_fel_count', 'juv_misd_count', 'decile_score','juv_other_count', 'priors_count', 'v_score_text', 'is_violent_recid', 'race']].copy()
X = X.fillna(0)
Y = df['two_year_recid'].copy()
unfair_model = LogisticRegression(max_iter=1000)
unfair_model.fit(X, Y)
priv = X[X['race'] == 1]
unpriv = X[X['race'] == 0]

priv_pred = unfair_model.predict(priv)
unpriv_pred = unfair_model.predict(unpriv)

accuracy_priv = accuracy_score(Y[X['race'] == 1], priv_pred)
accuracy_unpriv = accuracy_score(Y[X['race'] == 0], unpriv_pred)

print("Accuracy for privileged group (race == 1):", accuracy_priv)
print("Accuracy for unprivileged group (race == 0):", accuracy_unpriv)

Accuracy for privileged group (race == 1): 0.7224938875305623
Accuracy for unprivileged group (race == 0): 0.7126623376623377



Store the class Y, the non-sensitive features X, and the sensitive feature S separately.

In [5]:
non_sensitive_features = ['id', 'age', 'juv_fel_count', 'is_recid','juv_other_count', 'priors_count', 'v_score_text', 'is_violent_recid']
X = df[non_sensitive_features].copy()
Y = df['two_year_recid'].copy()
S = df['race'].copy()
print(X)
print(Y)
print(S)

         id  age  juv_fel_count  is_recid  juv_other_count  priors_count  \
1         3   34              0         1                0             0   
2         4   24              0         1                1             4   
3         5   23              0         0                0             1   
6         8   41              0         1                0            14   
8        10   39              0         0                0             0   
...     ...  ...            ...       ...              ...           ...   
7207  10994   30              0         1                0             0   
7208  10995   20              0         0                0             0   
7209  10996   23              0         0                0             0   
7210  10997   23              0         0                0             0   
7212  11000   33              0         0                0             3   

      v_score_text  is_violent_recid  
1                0                 1  
2        

In [6]:
def PRLOSS(unpriv, priv, learning_rate):
    unpriv_float = tf.cast(unpriv, dtype=tf.float32)
    priv_float = tf.cast(priv, dtype=tf.float32)

    n_unpriv = tf.cast(tf.shape(unpriv_float)[0], dtype=tf.float32)
    n_priv = tf.cast(tf.shape(priv_float)[0], dtype=tf.float32)

    n_unpriv = tf.maximum(n_unpriv, 1.0)
    n_priv = tf.maximum(n_priv, 1.0)

    Dxisi = tf.stack([n_priv, n_unpriv], axis=0)

    y_pred_priv = tf.reduce_sum(priv_float)
    y_pred_unpriv = tf.reduce_sum(unpriv_float)

    P_ys_stacked = tf.stack([y_pred_priv, y_pred_unpriv], axis=0)
    P_ys = P_ys_stacked / Dxisi

    P = tf.concat([unpriv_float, priv_float], axis=0)

    P_sum = tf.reduce_sum(P)
    total_samples = tf.cast(tf.size(unpriv_float) + tf.size(priv_float), dtype=tf.float32)
    P_y = P_sum / total_samples

    P_y = tf.maximum(P_y, 1e-12)

    log_P_ys_1 = tf.math.log(P_ys[1])
    log_P_y = tf.math.log(P_y)
    P_s1y1 = log_P_ys_1 - log_P_y

    log_1_minus_P_ys_1 = tf.math.log(1 - P_ys[1])
    log_1_minus_P_y = tf.math.log(1 - P_y)
    P_s1y0 = log_1_minus_P_ys_1 - log_1_minus_P_y

    log_P_ys_0 = tf.math.log(P_ys[0])
    log_P_y = tf.math.log(P_y)
    P_s0y1 = log_P_ys_0 - log_P_y

    log_1_minus_P_ys_0 = tf.math.log(1 - P_ys[0])
    log_1_minus_P_y = tf.math.log(1 - P_y)
    P_s0y0 = log_1_minus_P_ys_0 - log_1_minus_P_y

    P_s1y1 = tf.reshape(P_s1y1, [-1])
    P_s1y0 = tf.reshape(P_s1y0, [-1])
    P_s0y1 = tf.reshape(P_s0y1, [-1])
    P_s0y0 = tf.reshape(P_s0y0, [-1])

    PI_s1y1 = unpriv_float * P_s1y1
    PI_s1y0 = (1 - unpriv_float) * P_s1y0
    PI_s0y1 = priv_float * P_s0y1
    PI_s0y0 = (1 - priv_float) * P_s0y0

    PI = tf.reduce_sum(PI_s1y1) + tf.reduce_sum(PI_s1y0) + tf.reduce_sum(PI_s0y1) + tf.reduce_sum(PI_s0y0)

    return learning_rate * PI


X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size=0.1, random_state=42)

# In Rishabh's code the loss and val_loss were NaN during training,
# so I checked for NaN or infinite values in the data (to ensure data integrity)
print("NaN values in X_train:", np.any(np.isnan(X_train)))
print("NaN values in X_test:", np.any(np.isnan(X_test)))
print("NaN values in Y_train:", np.any(np.isnan(Y_train)))
print("NaN values in Y_test:", np.any(np.isnan(Y_test)))

# Normalize the input features -> zero mean and unit variance
X_train_normalized = (X_train - X_train.mean(axis=0)) / X_train.std(axis=0)
X_test_normalized = (X_test - X_test.mean(axis=0)) / X_test.std(axis=0)


def prediction_model(input_shape):
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(1, activation='sigmoid', input_shape=(input_shape,))
    ])
    return model

# Compile the model with the custom loss function
model = prediction_model(X_train.shape[1])
model.compile(optimizer='adam', loss=lambda y_true, y_pred: PRLOSS(y_true, y_pred, learning_rate=0.1), metrics = ['accuracy'])

# Train the model with normalized data
model.fit(X_train_normalized, Y_train, epochs=10, batch_size=32, validation_data=(X_test_normalized[:50], Y_test[:50]))

NaN values in X_train: False
NaN values in X_test: False
NaN values in Y_train: False
NaN values in Y_test: False
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7a7f16607d60>

In [9]:
X_test_subset = X_test[non_sensitive_features].copy()
X_test_subset_normalized = (X_test_subset - X_train[non_sensitive_features].mean(axis=0)) / X_train[non_sensitive_features].std(axis=0)

fair_predictions = model.predict(X_test_subset_normalized)
fair_predictions = (fair_predictions > 0.5).astype(int)

# Accuracy of fair model
fair_accuracy = accuracy_score(Y_test, fair_predictions)

print("Accuracy of fair model:", fair_accuracy)


Accuracy of fair model: 0.6813008130081301


In [16]:
print(X_test.columns)


Index(['id', 'age', 'juv_fel_count', 'is_recid', 'juv_other_count',
       'priors_count', 'v_score_text', 'is_violent_recid'],
      dtype='object')


In [18]:
placeholder_values = pd.DataFrame(np.zeros((len(X_test), 3)), columns=['decile_score', 'juv_misd_count', 'race'])

X_test_subset = pd.concat([X_test[['id', 'age', 'juv_fel_count', 'is_recid', 'juv_other_count', 'priors_count', 'v_score_text', 'is_violent_recid']], placeholder_values], axis=1)
X_test_subset_normalized = (X_test_subset - X_train[['id', 'age', 'juv_fel_count', 'is_recid', 'juv_other_count', 'priors_count', 'v_score_text', 'is_violent_recid', 'decile_score', 'juv_misd_count', 'race']].mean(axis=0)) / X_train[['id', 'age', 'juv_fel_count', 'is_recid', 'juv_other_count', 'priors_count', 'v_score_text', 'is_violent_recid', 'decile_score', 'juv_misd_count', 'race']].std(axis=0)

# Predictions using the unfair model (Logistic Regression)
unfair_predictions = unfair_model.predict(X_test_subset_normalized)

# Accuracy of unfair model
unfair_accuracy = accuracy_score(Y_test, unfair_predictions)

print("Accuracy of unfair model:", unfair_accuracy)


KeyError: "['decile_score', 'juv_misd_count', 'race'] not in index"

In [15]:
X_test_subset = X_test[['id', 'age', 'juv_fel_count', 'juv_misd_count', 'is_recid', 'decile_score', 'juv_other_count', 'priors_count', 'v_score_text', 'is_violent_recid', 'race']].copy()

X_test_subset_normalized = (X_test_subset - X_train[['id', 'age', 'juv_fel_count', 'juv_misd_count', 'is_recid', 'decile_score', 'juv_other_count', 'priors_count', 'v_score_text', 'is_violent_recid', 'race']].mean(axis=0)) / X_train[['id', 'age', 'juv_fel_count', 'juv_misd_count', 'is_recid', 'decile_score', 'juv_other_count', 'priors_count', 'v_score_text', 'is_violent_recid', 'race']].std(axis=0)

# Predictions using the unfair model (Logistic Regression)
unfair_predictions = unfair_model.predict(X_test_subset_normalized)

# Accuracy of unfair model
unfair_accuracy = accuracy_score(Y_test, unfair_predictions)

print("Accuracy of unfair model:", unfair_accuracy)


KeyError: "['juv_misd_count', 'decile_score', 'race'] not in index"

In [14]:
X_test_subset = X_test[['id', 'age', 'juv_fel_count', 'is_recid', 'juv_other_count', 'priors_count', 'v_score_text', 'is_violent_recid']].copy()

X_test_subset_normalized = (X_test_subset - X_train[['id', 'age', 'juv_fel_count', 'is_recid', 'juv_other_count', 'priors_count', 'v_score_text', 'is_violent_recid']].mean(axis=0)) / X_train[['id', 'age', 'juv_fel_count', 'is_recid', 'juv_other_count', 'priors_count', 'v_score_text', 'is_violent_recid']].std(axis=0)

unfair_predictions = unfair_model.predict(X_test_subset_normalized)
#unfair_predictions = unfair_model.predict(X_test)

unfair_accuracy = accuracy_score(Y_test, unfair_predictions)
print("Accuracy of unfair model:", unfair_accuracy)


ValueError: The feature names should match those that were passed during fit.
Feature names unseen at fit time:
- is_recid
Feature names seen at fit time, yet now missing:
- decile_score
- juv_misd_count
- race


In [7]:

####Rishabh####

evaluation = model.evaluate(X_test_normalized, Y_test)
print(evaluation)
privX_train, privX_test, privY_train, privY_test = train_test_split(priv,priv_pred, test_size=0.5, random_state=42)
unprivX_train, unprivX_test, unprivY_train, unprivY_test = train_test_split(unpriv,unpriv_pred, test_size=0.5, random_state=42)
privX_test_normalized = (privX_test - privX_test.mean(axis=0)) / privX_test.std(axis=0)
unprivX_test_normalized = (unprivX_test - unprivX_test.mean(axis=0)) / unprivX_test.std(axis=0)
# print("NaN values in Y_train:", np.any(np.isnan(privX_test)))
# print("NaN values in Y_test:", np.any(np.isnan(privY_test)))
priv_loss, priv_accuracy = model.evaluate(privX_test, privY_test)
print("Privileged Data Loss:", priv_loss)
print("Privileged Data Accuracy:", priv_accuracy)

unpriv_loss, unpriv_accuracy = model.evaluate(unprivX_test, unprivY_test)
print("Unprivileged Data Loss:", unpriv_loss)
print("Unprivileged Data Accuracy:", unpriv_accuracy)

[0.018549500033259392, 0.6813008189201355]


ValueError: in user code:

    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 2066, in test_function  *
        return step_function(self, iterator)
    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 2049, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 2037, in run_step  **
        outputs = model.test_step(data)
    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/training.py", line 1917, in test_step
        y_pred = self(x, training=False)
    File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/usr/local/lib/python3.10/dist-packages/keras/src/engine/input_spec.py", line 298, in assert_input_compatibility
        raise ValueError(

    ValueError: Input 0 of layer "sequential" is incompatible with the layer: expected shape=(None, 8), found shape=(None, 10)
