### Prepare the dataset

I've made it a little smaller

In [1]:
import numpy as np
import pandas as pd

import fatf.utils.data.datasets as fatf_datasets
import fatf.utils.models as fatf_models

import fatf.fairness.predictions.measures as fatf_pfm

import fatf.transparency.predictions.counterfactuals as fatf_cf

import sys
sys.path.append("../../src")
%load_ext autoreload
%autoreload 2


from project import rf_models, preprocessing


df = pd.read_csv("../../data/initial_custody_2017_gsprs.csv", low_memory=False)


df = df[df["race"] != 'A'] # Drop Asian

#df = df[df["race"] != 'H'] # Drop Hispanic

df = df[df["race"] != 'I'] # Drop Indian

df = df[df["race"] != 'O'] # Drop Other

def label_age (row):
    if row['date_of_birth'] < 19720000 :
        return 2 #old
    elif row['date_of_birth'] < 19960000:
        return 1 #middle
    else: 
        return 0 #young 


df['age_cat'] = df.apply(lambda row: label_age(row), axis=1)





def hi_lo_ic(row):
    if row['ic_custdy_level'] < 4:
        return 0 #low
    else: 
        return 1 #high

df['cl_cat'] = df.apply(lambda row: hi_lo_ic(row), axis=1)



#print(df["ic_custdy_level"])
# preprocessing
data = preprocessing.preprocess_input_vars(df)

data = data.convert_dtypes(infer_objects=True, convert_string=True,
      convert_integer=True, convert_boolean=True, convert_floating=True)

data = data[data["ic_custdy_level"] > 1]

#print(data["race"])
census_names =     [
        "gender_female",
        "age_cat",
        "race",
        "off_1_prs_max",
        "off_1_gs_max",
        "prior_commits",
        "ic_institut_adj",
        "cl_cat"
    ]

data = data[
    [
        "gender_female",
        "age_cat",
        "race",
        "off_1_prs_max",
        "off_1_gs_max",
        "prior_commits",
        "ic_institut_adj",
        "cl_cat"
    ]
]
data = data.dropna()










21-Aug-28 16:27:49 fatf.utils.array.tools INFO     Using numpy's numpy.lib.recfunctions.structured_to_unstructured as fatf.utils.array.tools.structured_to_unstructured and fatf.utils.array.tools.structured_to_unstructured_row.


In [2]:
data['off_1_gs_max'].describe()

count    13722.000000
mean        11.777146
std          3.073932
min          1.000000
25%         10.000000
50%         12.000000
75%         14.000000
max         15.000000
Name: off_1_gs_max, dtype: float64

### Convert data to a dictionary

In [3]:
X = data.drop("cl_cat", axis=1)

xx = X.to_records(index=False)

Y = data["cl_cat"]

dtypes = [('gender_female','i8'),
          ('age_cat','i8'),
          ('race','i8'),
          ('off_1_prs_max','f8'),
          ('off_1_gs_max','i8'),
          ('prior_commits','i8'),
          ('ic_institut_adj','i8')]

ic_data = {}



In [4]:
data_array = np.array(xx, dtype = dtypes)
data_array


array([(0, 2, 0, 4., 15,  1, 3), (0, 2, 0, 4., 14, 18, 2),
       (0, 2, 0, 3., 14,  2, 2), ..., (0, 1, 2, 2.,  9,  3, 5),
       (0, 2, 0, 3.,  5, 11, 2), (0, 1, 2, 3.,  8,  5, 2)],
      dtype=[('gender_female', '<i8'), ('age_cat', '<i8'), ('race', '<i8'), ('off_1_prs_max', '<f8'), ('off_1_gs_max', '<i8'), ('prior_commits', '<i8'), ('ic_institut_adj', '<i8')])

In [5]:
ic_data['data'] = data_array
ic_data['target'] = np.array(Y.values, dtype='i4')
ic_data['target_names'] = ['low','high']
ic_data['feature_names'] = ['gender_female','age_cat','race','off_1_prs_max','off_1_gs_max','prior_commits','ic_instit_adj']

In [6]:
ic_X = ic_data['data']


ic_y = ic_data['target']
ic_feature_names = ic_data['feature_names']
ic_class_names = ic_data['target_names']

In [7]:
# Map target indices to target names
ic_y = np.array([ic_class_names[i] for i in ic_y], dtype='str')


In [8]:
unique_identifiers = []
columns_to_keep = [i for i in ic_X.dtype.names if i not in unique_identifiers]
#
ic_X = ic_X[columns_to_keep]
ic_feature_names = [i for i in ic_feature_names if i not in unique_identifiers]

In [9]:
print(ic_X)

[(0, 2, 0, 4., 15,  1, 3) (0, 2, 0, 4., 14, 18, 2)
 (0, 2, 0, 3., 14,  2, 2) ... (0, 1, 2, 2.,  9,  3, 5)
 (0, 2, 0, 3.,  5, 11, 2) (0, 1, 2, 3.,  8,  5, 2)]


In [10]:
#Train a model
clf = fatf_models.KNN()
clf.fit(ic_X, ic_y)

### Check for counterfactuals

In [11]:
# Select a data point to evaluate its counterfactual fairness
data_point_index = 1124
data_point = ic_X[data_point_index]
data_point_y = ic_y[data_point_index]

# Select a set of protected features
protected_features = ['gender_female', 'age_cat','race']

# Print out the protected features
assert protected_features, 'The protected features list cannot be empty.'
person = ' is' if len(protected_features) == 1 else 's are'
print('The following fautre{} considered protected:'.format(person))
for feature_name in protected_features:
    print('    * "{}".'.format(feature_name))

# Print the instance
print('\nEvaluating counterfactual fairness of a data point (index {}) of '
      'class *{}* with the following features:'.format(data_point_index,
                                                       data_point_y))
for feature_name in data_point.dtype.names:
    print('    * The feature *{}* has value: {}.'.format(
        feature_name, data_point[feature_name]))

The following fautres are considered protected:
    * "gender_female".
    * "age_cat".
    * "race".

Evaluating counterfactual fairness of a data point (index 1124) of class *high* with the following features:
    * The feature *gender_female* has value: 0.
    * The feature *age_cat* has value: 1.
    * The feature *race* has value: 0.
    * The feature *off_1_prs_max* has value: 2.0.
    * The feature *off_1_gs_max* has value: 12.
    * The feature *prior_commits* has value: 1.
    * The feature *ic_institut_adj* has value: 4.


In [13]:
data_point_index = 1
data_point = ic_X[data_point_index]
data_point_y = ic_y[data_point_index]
cfs, cfs_distances, cfs_classes = fatf_pfm.counterfactual_fairness(
    instance=data_point,
    protected_feature_indices=protected_features,
    model=clf,
    default_numerical_step_size=.5,
    dataset=ic_X)
print(cfs)

[(0. , 1.5, 1. , 4., 14, 18, 2) (0. , 2. , 1.5, 4., 14, 18, 2)
 (0.5, 2. , 1. , 4., 14, 18, 2)]


In [24]:
ct = 0
for ii in range(1000):
    print("=====================================\nii =", ii)
    data_point_index = ii
    data_point = ic_X[data_point_index]
    data_point_y = ic_y[data_point_index]
    cfs, cfs_distances, cfs_classes = fatf_pfm.counterfactual_fairness(
        instance=data_point,
        protected_feature_indices=protected_features,
        model=clf,
        default_numerical_step_size=1,
        dataset=ic_X)
    #print(cfs)
    if len(cfs) > 0:
        ct +=1
        cfs_text = fatf_cf.textualise_counterfactuals(
            data_point,
            cfs,
            instance_class=data_point_y,
            counterfactuals_distances=cfs_distances,
            counterfactuals_predictions=cfs_classes)
        print('\n{}'.format(cfs_text))    
print(ct)

ii = 0
ii = 1

Instance (of class *high*):
(0, 2, 0, 4., 14, 18, 2)

Feature names: ('gender_female', 'age_cat', 'race', 'off_1_prs_max', 'off_1_gs_max', 'prior_commits', 'ic_institut_adj')

Counterfactual instance (of class *low*):
Distance: 2.0
    feature *age_cat*: *2* -> *1*
    feature *race*: *0* -> *1*
ii = 2

Instance (of class *low*):
(0, 2, 0, 3., 14, 2, 2)

Feature names: ('gender_female', 'age_cat', 'race', 'off_1_prs_max', 'off_1_gs_max', 'prior_commits', 'ic_institut_adj')

Counterfactual instance (of class *high*):
Distance: 2.0
    feature *age_cat*: *2* -> *0*
ii = 3
ii = 4

Instance (of class *low*):
(0, 2, 2, 4., 14, 2, 2)

Feature names: ('gender_female', 'age_cat', 'race', 'off_1_prs_max', 'off_1_gs_max', 'prior_commits', 'ic_institut_adj')

Counterfactual instance (of class *high*):
Distance: 1.0
    feature *age_cat*: *2* -> *1*

Counterfactual instance (of class *high*):
Distance: 3.0
    feature *age_cat*: *2* -> *0*
    feature *race*: *2* -> *1*

Counterfact


Instance (of class *low*):
(0, 2, 2, 3., 14, 6, 2)

Feature names: ('gender_female', 'age_cat', 'race', 'off_1_prs_max', 'off_1_gs_max', 'prior_commits', 'ic_institut_adj')

Counterfactual instance (of class *high*):
Distance: 4.0
    feature *age_cat*: *2* -> *0*
    feature *race*: *2* -> *0*
ii = 36
ii = 37

Instance (of class *low*):
(0, 2, 2, 3., 14, 4, 2)

Feature names: ('gender_female', 'age_cat', 'race', 'off_1_prs_max', 'off_1_gs_max', 'prior_commits', 'ic_institut_adj')

Counterfactual instance (of class *high*):
Distance: 2.0
    feature *race*: *2* -> *0*

Counterfactual instance (of class *high*):
Distance: 3.0
    feature *age_cat*: *2* -> *0*
    feature *race*: *2* -> *1*
ii = 38

Instance (of class *high*):
(0, 2, 2, 4., 9, 3, 3)

Feature names: ('gender_female', 'age_cat', 'race', 'off_1_prs_max', 'off_1_gs_max', 'prior_commits', 'ic_institut_adj')

Counterfactual instance (of class *high*):
Distance: 1.0
    feature *age_cat*: *2* -> *1*

Counterfactual instance (o

ii = 73
ii = 74
ii = 75
ii = 76

Instance (of class *high*):
(0, 2, 0, 4., 14, 5, 3)

Feature names: ('gender_female', 'age_cat', 'race', 'off_1_prs_max', 'off_1_gs_max', 'prior_commits', 'ic_institut_adj')

Counterfactual instance (of class *low*):
Distance: 1.0
    feature *age_cat*: *2* -> *1*
ii = 77
ii = 78

Instance (of class *low*):
(0, 2, 0, 4., 12, 2, 2)

Feature names: ('gender_female', 'age_cat', 'race', 'off_1_prs_max', 'off_1_gs_max', 'prior_commits', 'ic_institut_adj')

Counterfactual instance (of class *high*):
Distance: 1.0
    feature *age_cat*: *2* -> *1*
ii = 79
ii = 80
ii = 81
ii = 82

Instance (of class *low*):
(0, 2, 0, 4., 12, 1, 3)

Feature names: ('gender_female', 'age_cat', 'race', 'off_1_prs_max', 'off_1_gs_max', 'prior_commits', 'ic_institut_adj')

Counterfactual instance (of class *high*):
Distance: 1.0
    feature *age_cat*: *2* -> *1*
ii = 83
ii = 84

Instance (of class *high*):
(0, 2, 0, 4., 12, 2, 2)

Feature names: ('gender_female', 'age_cat', 'race', 

ii = 114

Instance (of class *high*):
(0, 2, 1, 1., 12, 10, 2)

Feature names: ('gender_female', 'age_cat', 'race', 'off_1_prs_max', 'off_1_gs_max', 'prior_commits', 'ic_institut_adj')

Counterfactual instance (of class *low*):
Distance: 1.0
    feature *age_cat*: *2* -> *1*

Counterfactual instance (of class *low*):
Distance: 1.0
    feature *race*: *1* -> *0*
ii = 115
ii = 116

Instance (of class *high*):
(0, 2, 0, 4., 14, 2, 2)

Feature names: ('gender_female', 'age_cat', 'race', 'off_1_prs_max', 'off_1_gs_max', 'prior_commits', 'ic_institut_adj')

Counterfactual instance (of class *high*):
Distance: 1.0
    feature *age_cat*: *2* -> *1*
ii = 117
ii = 118

Instance (of class *low*):
(0, 2, 2, 0.5, 15, 0, 1)

Feature names: ('gender_female', 'age_cat', 'race', 'off_1_prs_max', 'off_1_gs_max', 'prior_commits', 'ic_institut_adj')

Counterfactual instance (of class *high*):
Distance: 1.0
    feature *age_cat*: *2* -> *1*

Counterfactual instance (of class *high*):
Distance: 2.0
    feat

ii = 183
ii = 184
ii = 185
ii = 186
ii = 187
ii = 188
ii = 189
ii = 190
ii = 191
ii = 192
ii = 193
ii = 194
ii = 195
ii = 196
ii = 197

Instance (of class *high*):
(0, 2, 2, 3., 15, 0, 2)

Feature names: ('gender_female', 'age_cat', 'race', 'off_1_prs_max', 'off_1_gs_max', 'prior_commits', 'ic_institut_adj')

Counterfactual instance (of class *low*):
Distance: 1.0
    feature *age_cat*: *2* -> *1*

Counterfactual instance (of class *low*):
Distance: 1.0
    feature *race*: *2* -> *1*
ii = 198
ii = 199
ii = 200
ii = 201
ii = 202

Instance (of class *low*):
(0, 2, 0, 2., 4, 1, 3)

Feature names: ('gender_female', 'age_cat', 'race', 'off_1_prs_max', 'off_1_gs_max', 'prior_commits', 'ic_institut_adj')

Counterfactual instance (of class *high*):
Distance: 1.0
    feature *age_cat*: *2* -> *1*
ii = 203
ii = 204
ii = 205
ii = 206
ii = 207
ii = 208

Instance (of class *high*):
(0, 2, 0, 1., 15, 0, 1)

Feature names: ('gender_female', 'age_cat', 'race', 'off_1_prs_max', 'off_1_gs_max', 'prior_c

ii = 259

Instance (of class *high*):
(0, 2, 1, 1., 14, 2, 3)

Feature names: ('gender_female', 'age_cat', 'race', 'off_1_prs_max', 'off_1_gs_max', 'prior_commits', 'ic_institut_adj')

Counterfactual instance (of class *high*):
Distance: 1.0
    feature *age_cat*: *2* -> *1*
ii = 260

Instance (of class *low*):
(0, 2, 0, 1., 15, 0, 1)

Feature names: ('gender_female', 'age_cat', 'race', 'off_1_prs_max', 'off_1_gs_max', 'prior_commits', 'ic_institut_adj')

Counterfactual instance (of class *low*):
Distance: 1.0
    feature *race*: *0* -> *1*
ii = 261
ii = 262
ii = 263

Instance (of class *low*):
(0, 2, 2, 3., 15, 1, 3)

Feature names: ('gender_female', 'age_cat', 'race', 'off_1_prs_max', 'off_1_gs_max', 'prior_commits', 'ic_institut_adj')

Counterfactual instance (of class *high*):
Distance: 1.0
    feature *age_cat*: *2* -> *1*

Counterfactual instance (of class *high*):
Distance: 1.0
    feature *race*: *2* -> *1*

Counterfactual instance (of class *high*):
Distance: 2.0
    feature *

ii = 308
ii = 309
ii = 310
ii = 311
ii = 312
ii = 313
ii = 314
ii = 315
ii = 316
ii = 317
ii = 318
ii = 319
ii = 320
ii = 321

Instance (of class *high*):
(0, 1, 0, 4., 12, 1, 2)

Feature names: ('gender_female', 'age_cat', 'race', 'off_1_prs_max', 'off_1_gs_max', 'prior_commits', 'ic_institut_adj')

Counterfactual instance (of class *high*):
Distance: 1.0
    feature *race*: *0* -> *1*
ii = 322
ii = 323
ii = 324
ii = 325
ii = 326

Instance (of class *high*):
(0, 2, 2, 4., 14, 2, 2)

Feature names: ('gender_female', 'age_cat', 'race', 'off_1_prs_max', 'off_1_gs_max', 'prior_commits', 'ic_institut_adj')

Counterfactual instance (of class *high*):
Distance: 1.0
    feature *age_cat*: *2* -> *1*

Counterfactual instance (of class *high*):
Distance: 3.0
    feature *age_cat*: *2* -> *0*
    feature *race*: *2* -> *1*

Counterfactual instance (of class *high*):
Distance: 3.0
    feature *age_cat*: *2* -> *1*
    feature *race*: *2* -> *0*
ii = 327
ii = 328
ii = 329
ii = 330
ii = 331
ii = 33

ii = 384

Instance (of class *high*):
(0, 2, 2, 4., 3, 1, 3)

Feature names: ('gender_female', 'age_cat', 'race', 'off_1_prs_max', 'off_1_gs_max', 'prior_commits', 'ic_institut_adj')

Counterfactual instance (of class *high*):
Distance: 1.0
    feature *age_cat*: *2* -> *1*

Counterfactual instance (of class *high*):
Distance: 1.0
    feature *race*: *2* -> *1*

Counterfactual instance (of class *high*):
Distance: 2.0
    feature *age_cat*: *2* -> *1*
    feature *race*: *2* -> *1*
ii = 385
ii = 386
ii = 387
ii = 388
ii = 389
ii = 390
ii = 391
ii = 392

Instance (of class *high*):
(0, 2, 0, 3., 13, 5, 2)

Feature names: ('gender_female', 'age_cat', 'race', 'off_1_prs_max', 'off_1_gs_max', 'prior_commits', 'ic_institut_adj')

Counterfactual instance (of class *low*):
Distance: 1.0
    feature *age_cat*: *2* -> *1*

Counterfactual instance (of class *low*):
Distance: 1.0
    feature *race*: *0* -> *1*
ii = 393
ii = 394

Instance (of class *high*):
(0, 2, 0, 2., 7, 0, 4)

Feature names: (


Instance (of class *high*):
(0, 1, 2, 3., 15, 0, 2)

Feature names: ('gender_female', 'age_cat', 'race', 'off_1_prs_max', 'off_1_gs_max', 'prior_commits', 'ic_institut_adj')

Counterfactual instance (of class *high*):
Distance: 1.0
    feature *age_cat*: *1* -> *0*

Counterfactual instance (of class *high*):
Distance: 1.0
    feature *race*: *2* -> *1*
ii = 442
ii = 443
ii = 444
ii = 445
ii = 446

Instance (of class *high*):
(0, 2, 0, 4., 12, 4, 2)

Feature names: ('gender_female', 'age_cat', 'race', 'off_1_prs_max', 'off_1_gs_max', 'prior_commits', 'ic_institut_adj')

Counterfactual instance (of class *high*):
Distance: 1.0
    feature *age_cat*: *2* -> *1*

Counterfactual instance (of class *high*):
Distance: 1.0
    feature *race*: *0* -> *1*
ii = 447
ii = 448
ii = 449

Instance (of class *high*):
(1, 2, 2, 0.5, 15, 1, 2)

Feature names: ('gender_female', 'age_cat', 'race', 'off_1_prs_max', 'off_1_gs_max', 'prior_commits', 'ic_institut_adj')

Counterfactual instance (of class *high

ii = 483
ii = 484

Instance (of class *high*):
(0, 2, 0, 3., 12, 7, 3)

Feature names: ('gender_female', 'age_cat', 'race', 'off_1_prs_max', 'off_1_gs_max', 'prior_commits', 'ic_institut_adj')

Counterfactual instance (of class *low*):
Distance: 2.0
    feature *age_cat*: *2* -> *0*
ii = 485

Instance (of class *high*):
(0, 2, 2, 3., 14, 6, 3)

Feature names: ('gender_female', 'age_cat', 'race', 'off_1_prs_max', 'off_1_gs_max', 'prior_commits', 'ic_institut_adj')

Counterfactual instance (of class *low*):
Distance: 1.0
    feature *age_cat*: *2* -> *1*

Counterfactual instance (of class *low*):
Distance: 1.0
    feature *race*: *2* -> *1*

Counterfactual instance (of class *low*):
Distance: 2.0
    feature *age_cat*: *2* -> *1*
    feature *race*: *2* -> *1*
ii = 486
ii = 487
ii = 488
ii = 489
ii = 490
ii = 491
ii = 492
ii = 493
ii = 494
ii = 495
ii = 496
ii = 497
ii = 498
ii = 499

Instance (of class *high*):
(0, 2, 0, 4., 12, 1, 2)

Feature names: ('gender_female', 'age_cat', 'race',

ii = 542
ii = 543

Instance (of class *high*):
(0, 2, 0, 3., 14, 3, 3)

Feature names: ('gender_female', 'age_cat', 'race', 'off_1_prs_max', 'off_1_gs_max', 'prior_commits', 'ic_institut_adj')

Counterfactual instance (of class *low*):
Distance: 1.0
    feature *race*: *0* -> *1*
ii = 544
ii = 545

Instance (of class *low*):
(0, 2, 2, 3., 7, 1, 5)

Feature names: ('gender_female', 'age_cat', 'race', 'off_1_prs_max', 'off_1_gs_max', 'prior_commits', 'ic_institut_adj')

Counterfactual instance (of class *high*):
Distance: 1.0
    feature *age_cat*: *2* -> *1*

Counterfactual instance (of class *high*):
Distance: 2.0
    feature *age_cat*: *2* -> *1*
    feature *race*: *2* -> *1*

Counterfactual instance (of class *high*):
Distance: 2.0
    feature *race*: *2* -> *0*
ii = 546
ii = 547

Instance (of class *high*):
(0, 1, 0, 1., 12, 2, 2)

Feature names: ('gender_female', 'age_cat', 'race', 'off_1_prs_max', 'off_1_gs_max', 'prior_commits', 'ic_institut_adj')

Counterfactual instance (of cl

ii = 597
ii = 598
ii = 599
ii = 600
ii = 601
ii = 602
ii = 603

Instance (of class *low*):
(0, 2, 2, 2., 12, 0, 5)

Feature names: ('gender_female', 'age_cat', 'race', 'off_1_prs_max', 'off_1_gs_max', 'prior_commits', 'ic_institut_adj')

Counterfactual instance (of class *high*):
Distance: 1.0
    feature *race*: *2* -> *1*

Counterfactual instance (of class *high*):
Distance: 2.0
    feature *age_cat*: *2* -> *0*

Counterfactual instance (of class *high*):
Distance: 2.0
    feature *age_cat*: *2* -> *1*
    feature *race*: *2* -> *1*
ii = 604
ii = 605
ii = 606

Instance (of class *high*):
(0, 2, 0, 3., 12, 3, 2)

Feature names: ('gender_female', 'age_cat', 'race', 'off_1_prs_max', 'off_1_gs_max', 'prior_commits', 'ic_institut_adj')

Counterfactual instance (of class *high*):
Distance: 1.0
    feature *age_cat*: *2* -> *1*
ii = 607
ii = 608
ii = 609
ii = 610
ii = 611
ii = 612
ii = 613
ii = 614
ii = 615
ii = 616
ii = 617
ii = 618
ii = 619
ii = 620
ii = 621

Instance (of class *high*):
(


Instance (of class *high*):
(0, 2, 0, 4., 12, 1, 2)

Feature names: ('gender_female', 'age_cat', 'race', 'off_1_prs_max', 'off_1_gs_max', 'prior_commits', 'ic_institut_adj')

Counterfactual instance (of class *high*):
Distance: 2.0
    feature *age_cat*: *2* -> *1*
    feature *race*: *0* -> *1*
ii = 656
ii = 657
ii = 658

Instance (of class *high*):
(0, 1, 0, 3., 14, 2, 2)

Feature names: ('gender_female', 'age_cat', 'race', 'off_1_prs_max', 'off_1_gs_max', 'prior_commits', 'ic_institut_adj')

Counterfactual instance (of class *high*):
Distance: 1.0
    feature *age_cat*: *1* -> *0*
ii = 659
ii = 660

Instance (of class *high*):
(0, 2, 0, 3., 15, 0, 2)

Feature names: ('gender_female', 'age_cat', 'race', 'off_1_prs_max', 'off_1_gs_max', 'prior_commits', 'ic_institut_adj')

Counterfactual instance (of class *low*):
Distance: 1.0
    feature *race*: *0* -> *1*
ii = 661
ii = 662
ii = 663
ii = 664

Instance (of class *high*):
(0, 2, 0, 3., 3, 1, 2)

Feature names: ('gender_female', 'age_


Instance (of class *high*):
(0, 2, 2, 3., 15, 0, 2)

Feature names: ('gender_female', 'age_cat', 'race', 'off_1_prs_max', 'off_1_gs_max', 'prior_commits', 'ic_institut_adj')

Counterfactual instance (of class *low*):
Distance: 1.0
    feature *age_cat*: *2* -> *1*

Counterfactual instance (of class *low*):
Distance: 1.0
    feature *race*: *2* -> *1*
ii = 714

Instance (of class *low*):
(0, 2, 2, 3., 12, 1, 2)

Feature names: ('gender_female', 'age_cat', 'race', 'off_1_prs_max', 'off_1_gs_max', 'prior_commits', 'ic_institut_adj')

Counterfactual instance (of class *high*):
Distance: 1.0
    feature *age_cat*: *2* -> *1*
ii = 715
ii = 716
ii = 717

Instance (of class *low*):
(0, 2, 0, 4., 14, 3, 2)

Feature names: ('gender_female', 'age_cat', 'race', 'off_1_prs_max', 'off_1_gs_max', 'prior_commits', 'ic_institut_adj')

Counterfactual instance (of class *low*):
Distance: 1.0
    feature *age_cat*: *2* -> *1*
ii = 718
ii = 719
ii = 720
ii = 721
ii = 722
ii = 723
ii = 724

Instance (of cl


Instance (of class *low*):
(0, 2, 2, 1., 12, 1, 2)

Feature names: ('gender_female', 'age_cat', 'race', 'off_1_prs_max', 'off_1_gs_max', 'prior_commits', 'ic_institut_adj')

Counterfactual instance (of class *high*):
Distance: 2.0
    feature *age_cat*: *2* -> *1*
    feature *race*: *2* -> *1*
ii = 775

Instance (of class *low*):
(0, 2, 2, 3., 14, 4, 2)

Feature names: ('gender_female', 'age_cat', 'race', 'off_1_prs_max', 'off_1_gs_max', 'prior_commits', 'ic_institut_adj')

Counterfactual instance (of class *high*):
Distance: 2.0
    feature *race*: *2* -> *0*

Counterfactual instance (of class *high*):
Distance: 3.0
    feature *age_cat*: *2* -> *0*
    feature *race*: *2* -> *1*
ii = 776
ii = 777
ii = 778
ii = 779
ii = 780
ii = 781
ii = 782
ii = 783
ii = 784
ii = 785
ii = 786
ii = 787
ii = 788
ii = 789
ii = 790
ii = 791
ii = 792

Instance (of class *high*):
(0, 2, 2, 1., 14, 3, 2)

Feature names: ('gender_female', 'age_cat', 'race', 'off_1_prs_max', 'off_1_gs_max', 'prior_commits',


Instance (of class *high*):
(0, 2, 0, 4., 14, 4, 2)

Feature names: ('gender_female', 'age_cat', 'race', 'off_1_prs_max', 'off_1_gs_max', 'prior_commits', 'ic_institut_adj')

Counterfactual instance (of class *high*):
Distance: 1.0
    feature *race*: *0* -> *1*

Counterfactual instance (of class *high*):
Distance: 2.0
    feature *age_cat*: *2* -> *0*

Counterfactual instance (of class *high*):
Distance: 2.0
    feature *age_cat*: *2* -> *1*
    feature *race*: *0* -> *1*
ii = 837
ii = 838
ii = 839
ii = 840

Instance (of class *high*):
(0, 2, 0, 4., 14, 3, 4)

Feature names: ('gender_female', 'age_cat', 'race', 'off_1_prs_max', 'off_1_gs_max', 'prior_commits', 'ic_institut_adj')

Counterfactual instance (of class *low*):
Distance: 1.0
    feature *race*: *0* -> *1*
ii = 841

Instance (of class *low*):
(0, 2, 1, 1., 13, 2, 2)

Feature names: ('gender_female', 'age_cat', 'race', 'off_1_prs_max', 'off_1_gs_max', 'prior_commits', 'ic_institut_adj')

Counterfactual instance (of class *hig

ii = 885

Instance (of class *high*):
(0, 2, 2, 3., 14, 4, 2)

Feature names: ('gender_female', 'age_cat', 'race', 'off_1_prs_max', 'off_1_gs_max', 'prior_commits', 'ic_institut_adj')

Counterfactual instance (of class *high*):
Distance: 2.0
    feature *race*: *2* -> *0*

Counterfactual instance (of class *high*):
Distance: 3.0
    feature *age_cat*: *2* -> *0*
    feature *race*: *2* -> *1*
ii = 886
ii = 887
ii = 888
ii = 889
ii = 890
ii = 891

Instance (of class *high*):
(0, 2, 2, 1., 15, 7, 2)

Feature names: ('gender_female', 'age_cat', 'race', 'off_1_prs_max', 'off_1_gs_max', 'prior_commits', 'ic_institut_adj')

Counterfactual instance (of class *low*):
Distance: 2.0
    feature *age_cat*: *2* -> *0*
ii = 892
ii = 893
ii = 894
ii = 895
ii = 896
ii = 897

Instance (of class *high*):
(0, 2, 1, 3., 14, 3, 2)

Feature names: ('gender_female', 'age_cat', 'race', 'off_1_prs_max', 'off_1_gs_max', 'prior_commits', 'ic_institut_adj')

Counterfactual instance (of class *high*):
Distance: 2

ii = 944
ii = 945
ii = 946
ii = 947
ii = 948

Instance (of class *low*):
(0, 2, 2, 0.5, 6, 3, 2)

Feature names: ('gender_female', 'age_cat', 'race', 'off_1_prs_max', 'off_1_gs_max', 'prior_commits', 'ic_institut_adj')

Counterfactual instance (of class *high*):
Distance: 2.0
    feature *age_cat*: *2* -> *1*
    feature *race*: *2* -> *1*
ii = 949

Instance (of class *high*):
(0, 2, 0, 4., 15, 11, 2)

Feature names: ('gender_female', 'age_cat', 'race', 'off_1_prs_max', 'off_1_gs_max', 'prior_commits', 'ic_institut_adj')

Counterfactual instance (of class *low*):
Distance: 2.0
    feature *age_cat*: *2* -> *0*

Counterfactual instance (of class *low*):
Distance: 2.0
    feature *age_cat*: *2* -> *1*
    feature *race*: *0* -> *1*
ii = 950
ii = 951
ii = 952

Instance (of class *low*):
(0, 2, 0, 2., 15, 0, 2)

Feature names: ('gender_female', 'age_cat', 'race', 'off_1_prs_max', 'off_1_gs_max', 'prior_commits', 'ic_institut_adj')

Counterfactual instance (of class *high*):
Distance: 1.0
 

ii = 994
ii = 995
ii = 996
ii = 997

Instance (of class *high*):
(0, 1, 2, 2., 15, 0, 2)

Feature names: ('gender_female', 'age_cat', 'race', 'off_1_prs_max', 'off_1_gs_max', 'prior_commits', 'ic_institut_adj')

Counterfactual instance (of class *low*):
Distance: 3.0
    feature *age_cat*: *1* -> *0*
    feature *race*: *2* -> *0*
ii = 998
ii = 999

Instance (of class *high*):
(0, 2, 0, 4., 14, 1, 2)

Feature names: ('gender_female', 'age_cat', 'race', 'off_1_prs_max', 'off_1_gs_max', 'prior_commits', 'ic_institut_adj')

Counterfactual instance (of class *high*):
Distance: 2.0
    feature *age_cat*: *2* -> *0*
301


In [25]:
ii

999

In [26]:
ct

301

In [14]:
cfs_text = fatf_cf.textualise_counterfactuals(
    data_point,
    cfs,
    instance_class=data_point_y,
    counterfactuals_distances=cfs_distances,
    counterfactuals_predictions=cfs_classes)
print('\n{}'.format(cfs_text))


Instance (of class *high*):
(0, 2, 0, 4., 14, 18, 2)

Feature names: ('gender_female', 'age_cat', 'race', 'off_1_prs_max', 'off_1_gs_max', 'prior_commits', 'ic_institut_adj')

Counterfactual instance (of class *low*):
Distance: 1.5
    feature *age_cat*: *2* -> *1.5*
    feature *race*: *0* -> *1.0*

Counterfactual instance (of class *low*):
Distance: 1.5
    feature *race*: *0* -> *1.5*

Counterfactual instance (of class *low*):
Distance: 1.5
    feature *gender_female*: *0* -> *0.5*
    feature *race*: *0* -> *1.0*


(0, 2, 0, 4., 15, 2, 3)

'high'

In [16]:
cfs

array([], shape=(0, 0), dtype=float64)

### Train the model

In [3]:
X = data.drop("ic_custdy_level", axis=1)
Y = data["ic_custdy_level"]

data_array = X.values
ground_truth = Y.values

#rf = rf_models.BasicRFModel(X, Y)
#rf.fit()

from sklearn.ensemble import RandomForestClassifier

rf2 = RandomForestClassifier()
rf2.fit(data_array, ground_truth)


RandomForestClassifier()

### Fairness for a single point

In [14]:
# Select a data point to evaluate its counterfactual fairness
data_point_index = 4 + 2
data_point = X.iloc[data_point_index : data_point_index+1, :]
data_point_y = Y[data_point_index : data_point_index+1]


data_point.loc[X.index[data_point_index],'gender_female']

# Select a set of protected features
#protected_features = ['gender_female', 'race', 'age_cat']
protected_features = [0,1,2]

# Print out the protected features
assert protected_features, 'The protected features list cannot be empty.'

person = ' is' if len(protected_features) == 1 else 's are'
print('The following feature{} considered protected:'.format(person))
for feature_name in protected_features:
    print('    * "{}".'.format(feature_name))

# Print the instance
print('\nEvaluating counterfactual fairness of a data point (index {}) of '
      'class *{}* with the following features:'.format(data_point_index,
                                                       data_point_y.name))
for feature_name in data_point.columns.values.tolist():
    print('    * The feature *{}* has value: {}.'.format(
        feature_name, data_point.loc[X.index[data_point_index],feature_name]))


The following features are considered protected:
    * "0".
    * "1".
    * "2".

Evaluating counterfactual fairness of a data point (index 6) of class *ic_custdy_level* with the following features:
    * The feature *gender_female* has value: 0.
    * The feature *age_cat* has value: 0.
    * The feature *race* has value: 0.
    * The feature *off_1_prs_max* has value: 4.
    * The feature *off_1_gs_max* has value: 14.
    * The feature *prior_commits* has value: 2.
    * The feature *ic_institut_adj* has value: 2.


In [23]:
cfs, cfs_distances, cfs_classes = fatf_pfm.counterfactual_fairness(
    instance=data_point.values,
    protected_feature_indices=protected_features,
    model=rf2,
    default_numerical_step_size=.5,
    dataset=data_array)



IncorrectShapeError: The instance to be explained should be a 1-dimensional numpy array or a row of a structured array (numpy.void).

In [15]:
np.array([1,2])

array([1, 2])

In [18]:
type(data_point.values)

numpy.ndarray

In [24]:
type(data_point.values)

numpy.ndarray