In [1]:
import pandas as pd
import numpy as np
import time
import random
import fairlearn

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
# from sklearn.neighbors import DistanceMetric
from sklearn_extra.cluster import KMedoids
import matplotlib.pyplot as plt
import statistics
import pyclustering
from pyclustering.cluster.kmedoids import kmedoids
import gower
import seaborn as sns
import pyclustering
from sklearn.metrics.pairwise import pairwise_distances
from pyclustering.cluster import cluster_visualizer
from pyclustering.cluster.center_initializer import kmeans_plusplus_initializer
from pyclustering.cluster.kmedoids import kmedoids

from fairlearn.metrics import demographic_parity_difference
from sklearn.model_selection import KFold
import xgboost as xgb
from sklearn import preprocessing
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import mean_squared_error
from sklearn import svm

from sklearn.metrics import accuracy_score
from sklearn.metrics import recall_score
from sklearn.metrics import confusion_matrix as sklearn_confusion_matrix
from fairlearn.metrics import demographic_parity_difference, equalized_odds_difference

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import json

In [2]:
def get_cluster(X, cluster_instances):
    cluster = X.loc[X.index == 0]
    for value in cluster_instances:
        tmp = X.loc[X.index == value]
        cluster = pd.concat([cluster, tmp])
    return cluster


def confusion_matrix(X,Y, description, label):   
    """
    Get all values for quality measure
    ---------------------------------
    n = # instances covered by the subgroup description
    N = # instances of dataset
    
    tp = # instances covered by subgroup description and subgroup target
    fp = # instances covered by subgroup description but not subgroup target
    tn = # instances not covered by subgroup description and not subgroup target
    fn = # instances not covered by subgroup description but subgroup target
    
    TP = # instances covered by the subgroup target
    FP = # instances not covered by the subgroup target
    """
    
    # Create a mask to obtain all instances covered by description
    mask = pd.Series([True] * len(X))
    for feature, value in description.items():
        mask &= (X[feature] == value)
        
    n = len(X[mask]) 
    N = len(X) 
    
    tp = len(Y[mask].loc[Y[mask]['label'] == label])
    fp = len(Y[mask].loc[Y[mask]['label'] == 1-label])
    
    TP = len(Y.loc[Y['label'] == label]) 
    FP = len(Y.loc[Y['label'] == 1-label])
    
    tn = N - n - fp
    fn = TP - tp
    

    
    return tp, fp, tn, fn, TP, FP

# The following are from the VLSD article of Lopez-Martinez-Carrasco
def sensitivity(tp, TP):
    return tp/TP


def specificity(fp, FP):
    return (FP-fp)/FP

def piatetsky_shapiro(tp, fp, TP, FP):
    left = tp + fp
    if left == 0:
        return 0
    
    middle = tp/(tp+fp)
    right = TP/(TP+FP)
    return left * (middle-right)
  

def WRAcc(tp, fp, TP, FP):
    if tp + fp == 0:
        return 0
    left = (tp+fp)/(TP+FP)
    middle = tp/(tp+fp)
    right = TP/(TP+FP)
    return left * (middle-right)

def WRAcc_optimistic(tp, fp, TP, FP):
    if tp + fp == 0:
        first = 0
    else:
        first = tp**2/(tp+fp)
        
    if TP + FP == 0:
        second = 0
    else:
        second = TP/(TP+FP)
        
    return first*(1-second)

# Train ML Models

In [3]:
df = pd.read_excel("../german_credit_data.xlsx")
df['label'] = (df.Risk == 'good').astype(int)
df.drop(['Unnamed: 0','Risk'],axis=1,inplace=True)
df = df.replace(np.nan, 'unknown', regex=True)
df['Age'] = df['Age'].apply(lambda x: '>=25.5' if x >= 25.5 else '<25.5')
df['Credit amount'] = df['Credit amount'].apply(lambda x: '>=3913.5' if x >= 3913.5 else '<3913.5')
df['Duration'] = df['Duration'].apply(lambda x: '>=15.5' if x >= 15.5 else '<15.5')
df = df.replace(np.nan, 'unknown', regex=True)
changes = []
changes_dict = {}
data = df.loc[: , df.columns != 'label']
labels = df.loc[: , df.columns == 'label']


#Create lists of categorical and numerical features
all_features = ['Age','Credit amount','Duration','Job','Sex', 'Housing','Saving accounts',
                'Checking account','Purpose']
num_features = []
cat_features = all_features
df.head()

le = preprocessing.LabelEncoder()
for column in cat_features:
    le.fit(data[column])
    encoded_values = le.transform(data[column])
    unique_labels = data[column].unique()
    column_changes = {}
    
    for label, encoded_value in zip(unique_labels, encoded_values):
        new_key = f"{column}_{label}"
        # Check if the encoded value already exists for this column
        while encoded_value in column_changes.values():
            encoded_value += 1  # Increment until unique
        changes.append(f"{column}: {label} changed to {encoded_value}")
        column_changes[new_key] = encoded_value
    
    # Update the changes_dict with changes for the current column
    changes_dict.update(column_changes)
    data.loc[:, column] = encoded_values

# print("List of changes:")
for change in changes:
    print(change)
    
# 70% training and 30% test
data = data.astype('int64')
X_train, X_test, y_train, y_test = train_test_split(data, np.ravel(labels), test_size=0.30, random_state=78) 

Age: >=25.5 changed to 1
Age: <25.5 changed to 0
Credit amount: <3913.5 changed to 0
Credit amount: >=3913.5 changed to 1
Duration: <15.5 changed to 0
Duration: >=15.5 changed to 1
Job: 2 changed to 2
Job: 1 changed to 3
Job: 3 changed to 1
Job: 0 changed to 4
Sex: male changed to 1
Sex: female changed to 0
Housing: own changed to 1
Housing: free changed to 2
Housing: rent changed to 3
Saving accounts: unknown changed to 4
Saving accounts: little changed to 0
Saving accounts: quite rich changed to 1
Saving accounts: rich changed to 2
Saving accounts: moderate changed to 3
Checking account: little changed to 0
Checking account: moderate changed to 1
Checking account: unknown changed to 3
Checking account: rich changed to 2
Purpose: radio/TV changed to 5
Purpose: education changed to 6
Purpose: furniture/equipment changed to 3
Purpose: car changed to 4
Purpose: business changed to 1
Purpose: domestic appliances changed to 7
Purpose: repairs changed to 8
Purpose: vacation/others changed t

### Logistic Regression

In [4]:
# train logistic regression model and check performance
X_train, X_test, y_train, y_test = train_test_split(data, np.ravel(labels), test_size=0.30, random_state=78)
clf = LogisticRegression(random_state=100,max_iter=10000).fit(X_train, y_train)
clf_predictions_train = clf.predict(X_train)
clf_predictions_test = clf.predict(X_test)
clf_predictions_total = clf.predict(data)

p_correct_train = 1 - (abs(clf_predictions_train-y_train).sum() / len(y_train))
p_correct_test = 1 - (abs(clf_predictions_test-y_test).sum() / len(y_test))
p_correct_total = 1 - (abs(clf_predictions_total-np.ravel(labels)).sum() / len(np.ravel(labels)))

print('correct predictions train set' ,'\t', p_correct_train*100)
print('correct predictions test set' , '\t',  p_correct_test*100)
print('correct predictions total set' , '\t', p_correct_total*100)

correct predictions train set 	 75.14285714285714
correct predictions test set 	 72.0
correct predictions total set 	 74.2















# Bias

In [5]:
#input: array with predicted labels, array with true labels
#output: percentage correct predicted labels 

def wrong_disadvantage(predicted_labels,true_labels):
    count = 0 
    size = len(true_labels)
    for i in np.arange(size):
        if ((predicted_labels[i]==0) & (true_labels[i]==1)):
            count += 1
    output = count / size
    return output    


# Get Fairness based on description

In [6]:
def get_ground_truths(clf, data, label, description, sensitive_attribute='Sex'):
    # Create a mask to obtain all instances covered by description
    mask = pd.Series([True] * len(data))
    for feature, value in description.items():
        mask &= (data[feature] == value)
    
    data_labels = labels
    data_predicted = clf.predict(data)
    subgroup_labels = data_labels[mask]
    subgroup_predicted = data_predicted[mask]
    
    
    # something is fair when the tp and tn of the subgroup are the same as the tp and tn of the data overall?
    subgroup_true_positives = np.sum((subgroup_predicted == 1) & (subgroup_labels['label'] == 1))
    subgroup_true_negatives = np.sum((subgroup_predicted == 0) & (subgroup_labels['label'] == 0))
    subgroup_false_positives = np.sum((subgroup_predicted == 1) & (subgroup_labels['label'] == 0))
    subgroup_false_negatives = np.sum((subgroup_predicted == 0) & (subgroup_labels['label'] == 1))
    
    true_positives = np.sum((data_predicted == 1) & (data_labels['label'] == 1))
    true_negatives = np.sum((data_predicted == 0) & (data_labels['label'] == 0))
    false_positives = np.sum((data_predicted == 1) & (data_labels['label'] == 0))
    false_negatives = np.sum((data_predicted == 0) & (data_labels['label'] == 1))

    ground_truths_subgroup = [subgroup_true_positives, subgroup_true_negatives, subgroup_false_positives, 
                             subgroup_false_negatives]
    ground_truths_data = [true_positives, true_negatives, false_positives, false_negatives]
    
    return ground_truths_subgroup, ground_truths_data


def positives_subgroup(ground_truths_subgroup, ground_truths_data):
    tp = ground_truths_data[0]
    tn = ground_truths_data[1]
    fp = ground_truths_data[2]
    fn = ground_truths_data[3]
    
    tp_s = ground_truths_subgroup[0]
    tn_s = ground_truths_subgroup[1]
    fp_s = ground_truths_subgroup[2]
    fn_s = ground_truths_subgroup[3]
       
    data_size = tp+fp+tn+fn
    subgroup_size = tp_s+fp_s+tn_s+fn_s
    P_D = (tp+fp)/data_size
    P_D_g = (tp_s+fp_s)/subgroup_size
    
    #based on the assumption that each instance is evenly likely to belong to this subgroup
    alpha_P = subgroup_size/data_size 
#     beta_SP = np.abs(SP_D - SP_D_g) # now we don't know if the bias is positive or negative
    beta_P = P_D - P_D_g
    
    return alpha_P, P_D, P_D_g

def demographic_parity_subgroup(P_D, P_D_g):
    return np.abs(P_D-P_D_g)


def false_positives_subgroup(ground_truths_subgroup, ground_truths_data):  
    tp = ground_truths_data[0]
    tn = ground_truths_data[1]
    fp = ground_truths_data[2]
    fn = ground_truths_data[3]
    
    tp_s = ground_truths_subgroup[0]
    tn_s = ground_truths_subgroup[1]
    fp_s = ground_truths_subgroup[2]
    fn_s = ground_truths_subgroup[3]
       
    data_size = tp+fp+tn+fn
    subgroup_size = tp_s+fp_s+tn_s+fn_s

    FP_D = fp/(fp+tn)
    if fp_s + tn_s == 0:
        FP_D_g = 0
    else:
        FP_D_g = fp_s/(fp_s+tn_s)   
    
    alpha_FP = (fp_s+tn_s)/data_size
#     beta_FP = np.abs(FP_D - FP_D_g) # now we don't know if the bias is positive or negative
    beta_FP = FP_D - FP_D_g
    return alpha_FP, beta_FP

def true_positives_subgroup(ground_truths_subgroup, ground_truths_data):  
    tp = ground_truths_data[0]
    tn = ground_truths_data[1]
    fp = ground_truths_data[2]
    fn = ground_truths_data[3]
    
    tp_s = ground_truths_subgroup[0]
    tn_s = ground_truths_subgroup[1]
    fp_s = ground_truths_subgroup[2]
    fn_s = ground_truths_subgroup[3]
       
    data_size = tp+fp+tn+fn
    subgroup_size = tp_s+fp_s+tn_s+fn_s

    TP_D = tp/(tp+fn)
    if tp_s + fn_s == 0:
        TP_D_g = 0
    else:
        TP_D_g = tp_s/(tp_s+fn_s)   
    
    alpha_TP = (tp_s+fn_s)/data_size
#     beta_FP = np.abs(FP_D - FP_D_g) # now we don't know if the bias is positive or negative
    beta_TP = TP_D - TP_D_g
    return alpha_TP, beta_TP

def equalized_odds_subgroup(TPD, FPD):
    return max(np.abs(TPD), np.abs(FPD))
#     if TPD < 0 and FPD < 0:
#         return min(TPD, FPD)
#     else:
#         return max(np.abs(TPD), np.abs(FPD))
    
    

# gt_sg, gt_d = get_ground_truths(clf, data, labels, {'Checking account': 3})   
# statistical_parity_subgroup(gt_sg, gt_d)
# false_positives_subgroup(gt_sg, gt_d)

In [7]:
def fairness_based_on_descriptions(data, label, description, sensitive_attribute='Sex'):
    bias_clf = {
        'Accuracy': [],
        'Wrong disadvantage': [],
        
        'Recall': [],
        'sensitivity': [],
        'specificity': [],
        'piatetsky_shapiro': [],
        'WRAcc_score': [],
        'WRAcc_score_optimistic': [],
        
        'demographic_parity_difference' : [],
        'equalized_odds_difference' : [],
        'demographic_parity_subgroup': [],
        'equalized_odds_subgroup': [],
        'true_positive_subgroup': [],
        'false_positive_subgroup': [],
        'weighted_dpd_subgroup': [],
        'weighted_eod_subgroup': [],
        'weighted_tp_subgroup': [],
        'weighted_fp_subgroup': []
    }
       
    tp, fp, tn, fn, TP, FP = confusion_matrix(data,label, description, label=1)

    sensitivit = sensitivity(tp, TP)
    specificit = specificity(fp, FP)
    shapiro = piatetsky_shapiro(tp, fp, TP, FP)
    WRAcc_score = WRAcc(tp ,fp, TP, FP)
    WRAcc_score_optimistic = WRAcc_optimistic(tp, fp, TP, FP)
    
    
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    
    
    X_train, X_test, y_train, y_test = train_test_split(data, np.ravel(labels), test_size=0.30, random_state=78)
    for train_index, test_index in kf.split(data):
        X_train, X_test = data.iloc[train_index], data.iloc[test_index]
        y_train, y_test = label.iloc[train_index], label.iloc[test_index]
        clf = LogisticRegression(random_state=100,max_iter=10000)
        clf.fit(X_train, np.ravel(y_train))
        # Create a mask to obtain all instances covered by description
        mask = pd.Series([True] * len(data))
        for feature, value in description.items():
            mask &= (data[feature] == value)

        subset_X = data[mask]
        subset_y = label[mask]

        if len(subset_X) > 0:
            clf_predictions_description = clf.predict(subset_X)
        else:
            return pd.DataFrame(bias_clf)


        acc = accuracy_score(np.ravel(subset_y), clf_predictions_description)

        recall = recall_score(np.ravel(subset_y), clf_predictions_description, pos_label=1, zero_division=0.0)
        score = wrong_disadvantage(clf_predictions_description,np.ravel(subset_y))

        dp = demographic_parity_difference(subset_y, clf_predictions_description, 
                                           sensitive_features=subset_X[sensitive_attribute])
        eo = equalized_odds_difference(subset_y, clf_predictions_description, 
                                       sensitive_features=subset_X[sensitive_attribute])


        gt_sg, gt_d = get_ground_truths(clf, data, label, description) 
        alpha_P, P_D, P_D_g = positives_subgroup(gt_sg, gt_d)
        beta_P = demographic_parity_subgroup(P_D, P_D_g)
        alpha_FP, beta_FP = false_positives_subgroup(gt_sg, gt_d)
        alpha_TP, beta_TP = true_positives_subgroup(gt_sg, gt_d)
        eod = equalized_odds_subgroup(beta_TP, beta_FP)
        weod = equalized_odds_subgroup(alpha_TP*beta_TP, alpha_FP*beta_FP)
    
        bias_clf['Accuracy'].append(acc)
        bias_clf['Wrong disadvantage'].append(score)

        bias_clf['Recall'].append(recall)
        bias_clf['sensitivity'].append(sensitivit)
        bias_clf['specificity'].append(specificit)
        bias_clf['piatetsky_shapiro'].append(shapiro)
        bias_clf['WRAcc_score'].append(WRAcc_score)
        bias_clf['WRAcc_score_optimistic'].append(WRAcc_score_optimistic)

        bias_clf['demographic_parity_difference'].append(dp)
        bias_clf['equalized_odds_difference'].append(eo)
        bias_clf['demographic_parity_subgroup'].append(beta_P)
        bias_clf['equalized_odds_subgroup'].append(eod)
        bias_clf['false_positive_subgroup'].append(beta_FP)
        bias_clf['true_positive_subgroup'].append(beta_TP)
        bias_clf['weighted_dpd_subgroup'].append(alpha_P*beta_P)
        bias_clf['weighted_eod_subgroup'].append(weod)
        bias_clf['weighted_tp_subgroup'].append(alpha_TP*beta_TP)
        bias_clf['weighted_fp_subgroup'].append(alpha_FP*beta_FP)

    
    df_bias = pd.DataFrame(bias_clf)
    df_bias = pd.DataFrame(df_bias.mean()).T
    df_bias['description'] = [description]
    return df_bias


def allow_comparison_clustering(all_descriptions, data, label):
    comparison_clustering = pd.DataFrame(columns=[
        'description',
        'Accuracy',
        'Wrong disadvantage',
        'Recall',
        'sensitivity',
        'specificity',
        'piatetsky_shapiro',
        'WRAcc_score',
        'WRAcc_score_optimistic',
        'demographic_parity_difference',
        'equalized_odds_difference'])

    for description in all_descriptions:
        changed_description = {}
        for key, value in description.items():
            original_key = f"{key}_{value}"
            if original_key in changes_dict:
                changed_description[key] = changes_dict[original_key]
       
        row = fairness_based_on_descriptions(data, label, changed_description, sensitive_attribute='Sex').round(4)

        comparison_clustering = pd.concat([comparison_clustering, row], ignore_index=False)
        
    return comparison_clustering




# Comparison Subgroup Discovery vs Cluster discovery

In [8]:
list_of_changes = [
    {'Purpose': ['repairs', 8]}, 
    {'Purpose': ['vacation/others', 2]}, 
    {'Purpose': ['domestic appliances', 7]}, 
    {'Purpose': ['business', 1]}, 
    {'Purpose': ['car', 4]}, 
    {'Purpose': ['furniture/equipment', 3]}, 
    {'Purpose': ['education', 6]}, 
    {'Purpose': ['radio/TV', 5]}, 
    {'Checking account': ['rich', 2]}, 
    {'Checking account': ['unknown', 3]}, 
    {'Checking account': ['moderate', 1]}, 
    {'Checking account': ['little', 0]}, 
    {'Saving accounts': ['moderate', 3]}, 
    {'Saving accounts': ['rich', 2]}, 
    {'Saving accounts': ['quite rich', 1]}, 
    {'Saving accounts': ['little', 0]}, 
    {'Saving accounts': ['unknown', 4]}, 
    {'Housing': ['rent', 3]}, 
    {'Housing': ['free', 2]}, 
    {'Housing': ['own', 1]}, 
    {'Job': [0, 4]}, 
    {'Job': [3, 1]}, 
    {'Job': [1, 3]}, 
    {'Job': [2, 2]}, 
    {'Sex': ['female', 0]}, 
    {'Sex': ['male', 1]},
    {'Age': ['>=25.5', 1]},
    {'Age': ['<25.5', 0]},
    {'Duration': ['<15.5', 0]},
    {'Duration': ['>=15.5', 1]},
    {'Credit amount': [ '<3913.5', 0]},
    {'Credit amount': [ '>=3913.5', 1]}
]
   
def find_feature_change(list_of_changes, feature, value):
    for change in list_of_changes:
        if feature in change:
            new_value, old_value = change[feature]
            if old_value == value:
                return new_value
    return value

def translate_format_comparison(comparison_clustering):
    list_of_descriptions = list(comparison_clustering['description'])
    list_of_new_descriptions = []
    
    for description in list_of_descriptions:
        new_description = {}
        for feature, value in description.items():
            new_value = find_feature_change(list_of_changes, feature, value)
            new_description[feature] = new_value
        list_of_new_descriptions.append(new_description)

    comparison_clustering['description'] = list_of_new_descriptions
    
    return comparison_clustering

def preprocess_string_to_dict(s):
    try:
        return json.loads(s.replace("'", '"'))
    except json.JSONDecodeError:
        return {}
    

def preprocess_descriptions(df):
    df['description'] = df['description'].apply(preprocess_string_to_dict)
    return list(df['description'])

# PSD

In [12]:
PSD_results = pd.read_csv('../PSD/PSD_goodcredit_results.csv')[:100]    
PSD_results.to_csv('../PSD/PSD_results.csv', index=False)

# DFS

In [10]:
dfs_descriptions = pd.read_csv('../DFS/DFS_descriptions.csv')
dfs_descriptions = preprocess_descriptions(dfs_descriptions)
dfs_results = allow_comparison_clustering(dfs_descriptions, data, labels)
dfs_results = translate_format_comparison(dfs_results)
dfs_results = dfs_results.sort_values(by='WRAcc_score', ascending=False)
dfs_results.to_csv('../DFS/DFS_results.csv', index=False)

# VLSD

In [11]:
vlsd_descriptions = pd.read_csv('../VLSD/VLSD_descriptions.csv')[:100]
vlsd_descriptions = preprocess_descriptions(vlsd_descriptions)
vlsd_results = allow_comparison_clustering(vlsd_descriptions, data, labels)
vlsd_results = translate_format_comparison(vlsd_results)
vlsd_results = vlsd_results.sort_values(by='WRAcc_score', ascending=False)
vlsd_results.to_csv('../VLSD/VLSD_results.csv')