In [92]:
# importing necessary packages

import pickle
import os
import numpy as np
import pandas as pd
import plotly.express as px
from sklearn import model_selection, neural_network, linear_model, cluster

### Loading the Credit Risk Dataset

In [173]:
# importing the adult dataset, it is split into train and test by default
data = pd.read_csv('credit_risk/credit_risk_dataset.csv', header=1,names=['age', 'income', 'home_ownership', 
              'employment_length', 'loan_intent', 'loan_grade', 'loan_amount', 'loan_int_rate','loan_status', 
              'loan_income_ratio', 'historical_default', 'cred_hist_length',]) # dim: (32581 x 12)
data.head()

Unnamed: 0,age,income,home_ownership,employment_length,loan_intent,loan_grade,loan_amount,loan_int_rate,loan_status,loan_income_ratio,historical_default,cred_hist_length
0,22,59000,RENT,12.0,PERSONAL,D,35000,16.02,1,0.59,Y,3
1,21,9600,OWN,5.0,EDUCATION,B,1000,11.14,0,0.1,N,2
2,25,9600,MORTGAGE,1.0,MEDICAL,C,5500,12.87,1,0.57,N,3
3,23,65500,RENT,4.0,MEDICAL,C,35000,15.23,1,0.53,N,2
4,24,54400,RENT,8.0,MEDICAL,C,35000,14.27,1,0.55,Y,4


### Data Preprocessing

In [174]:
#  checking for missign values
data.isnull().sum()

age                      0
income                   0
home_ownership           0
employment_length      895
loan_intent              0
loan_grade               0
loan_amount              0
loan_int_rate         3116
loan_status              0
loan_income_ratio        0
historical_default       0
cred_hist_length         0
dtype: int64

In [175]:
#drop rows with missing values

data = data.dropna(axis=0)
print(data.shape)           # new dim: (28638, 12)

(28638, 12)


In [176]:
# changing 'historical_deafault' boolean values to numerical binary, Y=1, N=0

data['historical_default'] = data['historical_default'].replace(['Y','N'], [1,0])
# data.head(10)

In [177]:
#scatter plot matrix of data to get a sense of outliers

fig = px.scatter_matrix(data, dimensions=["age","income","employment_length","loan_amount","loan_int_rate","loan_income_ratio"],
       labels={col:col.replace('_', ' ') for col in data.columns}, height=900, color="loan_status", 
        color_continuous_scale=px.colors.diverging.Tealrose)
fig.show()

In [178]:
#Removing outliers
data = data[data['age']<=100]
data = data[data['income']<= 4000000]
data = data[data['employment_length']<=100]

print(data.shape)    # new dim: (28633, 12)

(28633, 12)


In [179]:
#Percentage of non-default cases
non_default = data[data.loan_status == 0].loan_status.count() / data.loan_status.count()
print("Non default %: ", non_default * 100)

Non default %:  78.33618552020395


In [180]:
#Box plot
fig = px.box(data, x="loan_grade", y="loan_income_ratio", color="loan_status",
       color_discrete_sequence=px.colors.qualitative.Dark24,labels={col:col.replace('_', ' ') for col in data.columns}, 
       category_orders={"loan_grade":["A","B","C","D","E","F","G"]})
fig.update_layout(legend=dict(orientation="h", yanchor="bottom",y=1.02, xanchor="right", x=1))
fig.show()

In [181]:
#Parallel category diagram
fig = px.parallel_categories(data, dimensions= ['home_ownership', 'loan_intent', "loan_grade", 'historical_default'], color_continuous_scale=px.colors.diverging.Tealrose, color="loan_status", 
        labels={col:col.replace('_', ' ') for col in data.columns})
fig.show()

In [182]:
#changing rest of the categorical variable to indicator varaible

# dim: (28638, 12)
# for each column with categorical varaibles
for col in ['home_ownership', 'loan_intent', 'loan_grade']:
    
    prefix_col = col                     # set prefix of new column names
    
    # converting categorical varaibles into indicator variables
    data = pd.concat([data, pd.get_dummies(data[col], prefix=prefix_col, drop_first=False)], axis=1)
    data = data.drop(col, axis=1)

print(data.shape)    # new dimensions: (28633 x 26)
print(list(data.columns))

(28633, 26)
['age', 'income', 'employment_length', 'loan_amount', 'loan_int_rate', 'loan_status', 'loan_income_ratio', 'historical_default', 'cred_hist_length', 'home_ownership_MORTGAGE', 'home_ownership_OTHER', 'home_ownership_OWN', 'home_ownership_RENT', 'loan_intent_DEBTCONSOLIDATION', 'loan_intent_EDUCATION', 'loan_intent_HOMEIMPROVEMENT', 'loan_intent_MEDICAL', 'loan_intent_PERSONAL', 'loan_intent_VENTURE', 'loan_grade_A', 'loan_grade_B', 'loan_grade_C', 'loan_grade_D', 'loan_grade_E', 'loan_grade_F', 'loan_grade_G']


In [183]:
# move the loan_status to the end of the columns as it is our y-values

y_values = data.pop('loan_status')
data.insert(25,'loan_status', y_values)
print(data.shape)

(28633, 26)


In [184]:
data.reset_index(inplace=True,drop=True)
data.tail(10)

Unnamed: 0,age,income,employment_length,loan_amount,loan_int_rate,loan_income_ratio,historical_default,cred_hist_length,home_ownership_MORTGAGE,home_ownership_OTHER,...,loan_intent_PERSONAL,loan_intent_VENTURE,loan_grade_A,loan_grade_B,loan_grade_C,loan_grade_D,loan_grade_E,loan_grade_F,loan_grade_G,loan_status
28623,60,45600,1.0,20000,10.0,0.44,0,26,0,0,...,0,1,0,1,0,0,0,0,0,1
28624,52,52000,0.0,9600,8.49,0.18,0,22,0,0,...,1,0,1,0,0,0,0,0,0,0
28625,56,90000,0.0,7200,6.17,0.08,0,19,1,0,...,1,0,1,0,0,0,0,0,0,0
28626,52,65004,4.0,20000,15.58,0.31,1,19,0,0,...,1,0,0,0,0,1,0,0,0,1
28627,52,64500,0.0,5000,11.26,0.08,0,20,0,0,...,0,0,0,1,0,0,0,0,0,0
28628,57,53000,1.0,5800,13.16,0.11,0,30,1,0,...,1,0,0,0,1,0,0,0,0,0
28629,54,120000,4.0,17625,7.49,0.15,0,19,1,0,...,1,0,1,0,0,0,0,0,0,0
28630,65,76000,3.0,35000,10.99,0.46,0,28,0,0,...,0,0,0,1,0,0,0,0,0,1
28631,56,150000,5.0,15000,11.48,0.1,0,26,1,0,...,1,0,0,1,0,0,0,0,0,0
28632,66,42000,2.0,6475,9.99,0.15,0,30,0,0,...,0,0,0,1,0,0,0,0,0,0


In [185]:
# splitting the dataset into x, y and then train, test
Y = data['loan_status']
X = data.drop('loan_status',axis=1)
x_train, x_test, y_train, y_test = model_selection.train_test_split(X, Y, test_size=.2, shuffle=True, random_state=42)

x_train = x_train.reset_index(drop=True)   
x_test = x_test.reset_index(drop=True) 
y_train = y_train.reset_index(drop=True)   
y_test = y_test.reset_index(drop=True) 

x_train.shape, y_train.shape, x_test.shape, y_test.shape

((22906, 25), (22906,), (5727, 25), (5727,))

In [186]:
# print the values count of loan_status

print(y_train.value_counts(normalize=True))
print(y_test.value_counts(normalize=True))

0    0.78176
1    0.21824
Name: loan_status, dtype: float64
0    0.789768
1    0.210232
Name: loan_status, dtype: float64


### We won't balance the dataset because real-world dataset will also have more number of non-default borrowers than the default borrowers.

In [4]:
# converting the pandas dataframe to numpy array
total_np = total.to_numpy()

# D has 7841 samples, D_aux has 7841 samples, and D_test has 7692 samples

# taking '>50K' income as y values
y = (total_np[:, -1] + total_np[:, -2]).astype(np.float32) # last two columns are duplicate, so add them 
y = np.delete(y, 32561, axis=0)                            # delete the 32561th row value for y as it has NaN values

# taking rest of the data as x values, after deleting the last three columns i.e columns
x = np.delete(total_np, [total_np.shape[1]-1, total_np.shape[1]-2, total_np.shape[1]-3], axis=1)
x = np.delete(x, 32561, axis=0).astype(np.float32)         # delete the 32561th row value for x as it has NaN vlaues


#separating the dataset into training and testing, training [0-32560] row, testing [32561-rest]
train_x, train_y = x[:32561], y[:32561]     # dim: (32561 x 57)
test_x, test_y = x[32561:], y[32561:]       # dim: (16281 x 57)

# saving the index of samples where y is true(1) and false(0)
train_zero_inds = np.where(train_y==0)[0]   # dim: (24720 x 1)
train_one_inds = np.where(train_y==1)[0]    # dim: (7841 x 1)
test_zero_inds = np.where(test_y==0)[0]     # dim: (12435 x 1)
test_one_inds = np.where(test_y==1)[0]      # dim: (3846 x 1)

# creating an array of random numbers, range[0-24720(train_zero_inds.shape[0])], of dimension (7841(train_one_inds.shape[0]), 1) 
train_zeros = np.random.choice(train_zero_inds.shape[0], train_one_inds.shape[0], replace=False) # dim:(7841 x 1)

# creating an array of random numbers, range[0-12435(test_zero_inds.shape[0])], of dimension (3846(test_one_inds.shape[0]), 1)
test_zeros = np.random.choice(test_zero_inds.shape[0], test_one_inds.shape[0], replace=False) # dim: (3846 x 1)


# concatenating random choices of zero indexed example with one indexed example to build a dataset with 
# equal number of zero and one indexed samples
train_x = np.concatenate((train_x[train_zeros], train_x[train_one_inds]), axis=0) # dim: (15682 x 57)
train_y = np.concatenate((train_y[train_zeros], train_y[train_one_inds]), axis=0) # dim: (15682 x 1)


test_x = np.concatenate((test_x[test_zeros], test_x[test_one_inds]), axis=0)      # dim: (7692 x 57)
test_y = np.concatenate((test_y[test_zeros], test_y[test_one_inds]), axis=0)      # dim: (7692 x 1)


# shuffle training data(row wise) by shuffling the index and getting the data from shuffled index
train_shuffle = np.random.choice(train_x.shape[0], train_x.shape[0], replace=False) # dim: (15682 x 1)
train_x, train_y = train_x[train_shuffle], train_y[train_shuffle]                   # dim: (15682 x 57) and (15682 x 1)

train_size = train_x.shape[0]//2

train_x, train_y, ho_x, ho_y = train_x[:train_size], train_y[:train_size], train_x[train_size:], train_y[train_size:]

pois_rates = [0.5, 1, 2]     # poison rates
#pois_rates = [0.5, 1, 1.5, 2, 2.5, 3]

# D: (train_x, train_y)
# D_aux : (ho_x, ho_y)
# D_test: (test_x, test_y)
train_x.shape, train_y.shape, ho_x.shape, ho_x.shape, test_x.shape, test_y.shape

((7841, 57), (7841,), (7841, 57), (7841, 57), (7692, 57), (7692,))

### Training Clean LR and NN

In [187]:
# training and testing the linear regression model
lm = linear_model.LogisticRegression(max_iter = 5000)
lm.fit(x_train, y_train)
clean_lm_test_acc = lm.score(x_test, y_test)
print(clean_lm_test_acc)

0.8128164833246028


In [109]:
# print(lm.classes_) # two classes '0' & '1'

# predict the output probability for ho_x dataset. predict_proba gives probability for each class i.e a tuple 
# in this case [0.1 0.9] probaibility for this class. np.eye(2) gets [1 0] when y = 0 and [0 1] when y = 1
# which when muliplied singles out the probability
lm_preds = np.multiply(lm.predict_proba(ho_x), np.eye(2)[ho_y.astype(np.int)]).sum(axis=1) # dim: (7841, 1)

# could have simply done predict
lm_pred_class = lm.predict(ho_x)                    # dim: (7841, 1)

print(lm_preds) # print the probabilites but has no information on which class it belongs to
print(lm_pred_class)  # print the predictions, gives classes


NameError: name 'ho_x' is not defined

In [189]:
# training and test the NN with 3 hidden layers 1st & 2nd hidden layer: 120 neurons, 3rd hidden layer: 60 neurons
nn = neural_network.MLPClassifier(hidden_layer_sizes=(120,120,60,),random_state=42)
nn.fit(x_train, y_train)
clean_nn_test_acc = nn.score(x_test, y_test)
print(clean_nn_test_acc)

0.8379605378033874


In [9]:
# prediction for the nn

# probability of belonging to a clas

nn_preds = np.multiply(nn.predict_proba(ho_x), np.eye(2)[ho_y.astype(np.int)]).sum(axis=1) 
nn_pred_class = nn.predict(ho_x) # prediction class

print(nn_preds)
print(nn_pred_class)

[0.97376835 0.9968766  0.08229731 ... 0.96648937 0.99939501 0.75527519]
[0. 1. 0. ... 0. 1. 0.]


In [10]:
pred_mean = np.multiply(nn.predict_proba(test_x), np.eye(2)[test_y.astype(int)]).mean()*2
print(pred_mean)

0.7632116561694878


In [192]:
x_train['loan_grade_G'].value_counts()

0    22862
1       44
Name: loan_grade_G, dtype: int64

In [199]:
x_train[x_train['loan_grade_G']==1].index

Int64Index([  311,   854,  1571,  2256,  2462,  3569,  3768,  4382,  4619,
             4651,  5884,  6006,  8050,  8085,  8459,  8463,  9179,  9523,
             9953, 10073, 10198, 11574, 12138, 12200, 12344, 12601, 14422,
            15216, 15582, 17074, 17547, 17781, 18279, 18927, 19092, 19193,
            19859, 20005, 20455, 21982, 22004, 22070, 22077, 22789],
           dtype='int64')

In [198]:
x_test[x_test['loan_grade_G']==1]

Unnamed: 0,age,income,employment_length,loan_amount,loan_int_rate,loan_income_ratio,historical_default,cred_hist_length,home_ownership_MORTGAGE,home_ownership_OTHER,...,loan_intent_MEDICAL,loan_intent_PERSONAL,loan_intent_VENTURE,loan_grade_A,loan_grade_B,loan_grade_C,loan_grade_D,loan_grade_E,loan_grade_F,loan_grade_G
26,37,88000,4.0,25000,19.66,0.28,1,15,0,0,...,0,0,0,0,0,0,0,0,0,1
375,23,113000,0.0,19750,21.21,0.17,0,4,1,0,...,0,0,1,0,0,0,0,0,0,1
452,26,96000,2.0,10000,21.14,0.1,0,3,1,0,...,0,0,1,0,0,0,0,0,0,1
768,27,39600,4.0,2500,20.86,0.06,1,8,1,0,...,0,0,1,0,0,0,0,0,0,1
821,28,118000,6.0,25000,20.11,0.21,1,6,1,0,...,0,1,0,0,0,0,0,0,0,1
932,31,84000,3.0,25000,17.34,0.3,1,5,1,0,...,0,0,0,0,0,0,0,0,0,1
1882,29,54000,1.0,25000,20.16,0.46,1,8,1,0,...,0,1,0,0,0,0,0,0,0,1
1937,24,37680,8.0,15000,19.82,0.4,0,2,0,0,...,0,0,0,0,0,0,0,0,0,1
2931,31,144000,0.0,13000,21.21,0.09,0,6,0,0,...,0,0,0,0,0,0,0,0,0,1
3132,24,88000,2.0,25000,18.84,0.28,0,2,1,0,...,0,0,0,0,0,0,0,0,0,1


# FeatureMatch

### Data preprocessing

In [11]:
# FeatureMatch Data Preprocessing

np.random.seed(0)
# concatenating education level and race for D_aux data
protected = np.concatenate((ho_x[:, 12:27], ho_x[:, 52:57]), axis=1) # dim: (7841, 20)

# concatenating education level and race for D_test data
test_prot = np.concatenate((test_x[:, 12:27], test_x[:, 52:57]), axis=1) # dim: (7692, 20)

# concatenating education level and race for D_train data
train_prot = np.concatenate((train_x[:, 12:27], train_x[:, 52:57]), axis=1) # dim: (7841, 20)

all_cols = list(total.columns)                # getting names of all columns
prot_cols = all_cols[12:27] + all_cols[52:57] # getting names of protected columns

subclasses, counts = np.unique(protected, axis=0, return_counts=True)# 122 unique examples in the protected data 
# dim: (122 x 20), (122,1)
# print(tuple(zip(subclasses, counts)))
# print(subclasses[0:5], counts[0:5])

hd_sbcl_conf = []                             # empty array
hd_used = []                                  # empty array

# declaring arrays for storing errors in the subclasses, i.e [subclasses, (clean_acc, collat, target), pois_ind]

#  dim: (122 x 3 x len: pois rates) for a class (3 x pois len) dimensional array
hd_lr_errs = np.zeros((len(subclasses), 3, len(pois_rates)))
#  dim: (122 x 3 x len: pois rates) for a class (3 x 3 pois len) dimensional array
hd_nn_errs = np.zeros((len(subclasses), 3, len(pois_rates)))  


### FeatureMatch: Poisoning the data and calculating error rates

In [12]:
# Feature Match: Initially there are 122 subclasses of common features, later if senctences filters the subclasses 
# to number to 35

for i, (subcl, count) in enumerate(zip(subclasses, counts)):  # for each subclass in subclasses
    if count > 10 and count < 100:                            # if the number of counts is more than 10 and less than 100
        hd_used.append((i, count))                            # mark if by apppend to hd_used
        
        print("\n")
        print("Subclass Index: %d, Subclass Count: %d " % (i, count)) # print subclass index and count
      
        # subtract the current subclass from test_prot data, find frobenius norm along columns and then get index of
        # data where the norm is still zero. This finds the samples in test_prot (D_test) which have identical features 
        # to current subclass
        test_sbcl = np.where(np.linalg.norm(test_prot - subcl, axis=1)==0) # 2D array of (indexes, datatypes)
        
        # same logic as above, finding the samples in protected data (D_aux) which have identical features to current subclass
        
        sbcl = np.where(np.linalg.norm(protected-subcl, axis=1)==0)   # 2D array of (indexes, datatypes) Note: number of index
                                                                      # should be equal to counts value
        
        # same logic as above, finding the samples in protected data (D_train) which have identical features to current subclass
        train_sbcl = np.where(np.linalg.norm(train_prot - subcl, axis=1)==0) # 2D array of (indexes, datatypes)
        
        # getting the samples with idential feature (to current subclass) from test data (D_test)
        p_t_x, p_t_y = test_x[test_sbcl], test_y[test_sbcl] 
        
        # getting the samples with idential feature (to current subclass) from auxiliary data (D_aux)
        # labelling it as poison data
        pois_x_base, pois_y_base = ho_x[sbcl], ho_y[sbcl]  
        
        # getting the prediction probability of identical samples from(D_aux), and finding their mean
        sc_lr_pred, sc_nn_pred = lm_preds[sbcl].mean(), nn_preds[sbcl].mean()
        print("Sc_lr_pred: %f, Sc_nn_pred: %f " % (sc_lr_pred, sc_nn_pred))
        
        train_ct = train_sbcl[0].shape[0] # number of identical samples in train_sbcl (D_train)
        test_ct = p_t_x.shape[0]          # number of identical samples in test_sbcl (D_test) 
        
        # multiplying prediction probability [x y] of a sample with [0 1] or [1 0] based on y values
        # taking their sum(mean) and multiplying with 2
        hd_sbcl_conf.append(2*np.multiply(lm.predict_proba(p_t_x), np.eye(2)[p_t_y.astype(int)]).mean())
        
#         all_errs = []

        # multiply poison rates with number of identical training samples, then take it as integer, dim same as pois rate, 1D array
        # pois_ct now has the number of poisoned samples which needs to be added to D_train
        # for each number(pois_ct) in the pois_rates 
        for j, pois_ct in enumerate([int(train_ct*pois_rate) for pois_rate in pois_rates]):

            # get the random indexes of 'pois_ct' number of samples from pois_x_base with replacement (repeated data is allowed)
            pois_inds = np.random.choice(pois_x_base.shape[0], pois_ct, replace=True)

            # get the x_values and flip the y values. Now we have the poison data
            pois_x, pois_y = pois_x_base[pois_inds], 1 - pois_y_base[pois_inds]

            # add the poisoned data to the training data
            total_x, total_y = np.concatenate((train_x, pois_x), axis=0), np.concatenate((train_y, pois_y), axis=0)

            # print the column name for which the value (v > 0.5) is 1 basically, then print the number of samples
            # in training, poisoned, and testing data. This finally prints which features we are poisoning
            
            print([prot_col for v, prot_col in zip(subcl, prot_cols) if v > 0.5], train_ct, pois_ct, test_ct)
            print("poison fraction:", pois_ct/train_ct) # printing the poison fraction

            # creating three Logistic regression model so that we could train three models and average their results
            lmps = [linear_model.LogisticRegression(solver='liblinear', max_iter=5000) for _ in range(3)] 
            for lmp in lmps:
                lmp.fit(total_x, total_y)

            # creating three Logistic regression model so that we could train three models on the Poisoned Data (total_x, total_y)
            nnps = [neural_network.MLPClassifier(hidden_layer_sizes=(10,), max_iter=3000) for _ in range(3)]
            for nnp in nnps:
                nnp.fit(total_x, total_y)

            # get the average accuracy score on test data (D_test) for three models of LR
            lmp_acc_colla = np.mean([lmp.score(test_x, test_y) for lmp in lmps])
            print("Poisoned lr test acc {:.3f}".format(lmp_acc_colla))

            # get the average accuracy score on test data (D_test) for three models of NN
            nnp_acc_colla = np.mean([nnp.score(test_x, test_y) for nnp in nnps])
            print("Poisoned nn test acc {:.3f}".format(nnp_acc_colla))

            # checking the score for identical samples(p_t_x, p_t_y) in test data on Clean LR model
            lmc_sbc = lm.score(p_t_x, p_t_y)
            print("lr clean sbc {:.3f}".format(lmc_sbc))

            # checking the score for identical samples(p_t_x, p_t_y) in test data on Poisoned LR model
            lmp_sbc_itest = np.mean([lmp.score(p_t_x, p_t_y) for lmp in lmps])
            print("lr poisoned sbc {:.3f}".format(lmp_sbc_itest))

            # checking the score for identical samples(p_t_x, p_t_y) in test data on Clean NN model
            nnc_sbc = nn.score(p_t_x, p_t_y)
            print("nn clean  sbc {:.3f}".format(nnc_sbc))

            # checking the score for identical samples(p_t_x, p_t_y) in test data on Clean NN model
            nnp_sbc_itest = np.mean([nnp.score(p_t_x, p_t_y) for nnp in nnps])
            print("nn poisoned sbc {:.3f}".format(nnp_sbc_itest))

            # storing the errors as (subclass, (row0: clean_acc, row1: collat, row2: target), (pois_ind))
            hd_lr_errs[i, 0, j] = lmc_sbc
            hd_lr_errs[i, 1, j] = lmp_acc_colla
            hd_lr_errs[i, 2, j] = lmp_sbc_itest

            hd_nn_errs[i, 0, j] = nnc_sbc
            hd_nn_errs[i, 1, j] = nnp_acc_colla
            hd_nn_errs[i, 2, j] = nnp_sbc_itest



Subclass Index: 7, Subclass Count: 32 
Sc_lr_pred: 0.797437, Sc_nn_pred: 0.744064 
['education_16.0', 'race_ White'] 33 16 20
poison fraction: 0.48484848484848486
Poisoned lr test acc 0.832
Poisoned nn test acc 0.826
lr clean sbc 0.800
lr poisoned sbc 0.800
nn clean  sbc 0.800
nn poisoned sbc 0.800
['education_16.0', 'race_ White'] 33 33 20
poison fraction: 1.0
Poisoned lr test acc 0.836
Poisoned nn test acc 0.833
lr clean sbc 0.800
lr poisoned sbc 0.700
nn clean  sbc 0.800
nn poisoned sbc 0.817
['education_16.0', 'race_ White'] 33 66 20
poison fraction: 2.0
Poisoned lr test acc 0.821
Poisoned nn test acc 0.829
lr clean sbc 0.800
lr poisoned sbc 0.750
nn clean  sbc 0.800
nn poisoned sbc 0.817


Subclass Index: 12, Subclass Count: 14 
Sc_lr_pred: 0.896783, Sc_nn_pred: 0.891565 
['education_16.0', 'race_ Asian-Pac-Islander', 'sex_ Male'] 12 6 18
poison fraction: 0.5
Poisoned lr test acc 0.836
Poisoned nn test acc 0.835
lr clean sbc 1.000
lr poisoned sbc 1.000
nn clean  sbc 1.000
nn poi

Poisoned lr test acc 0.836
Poisoned nn test acc 0.834
lr clean sbc 0.738
lr poisoned sbc 0.738
nn clean  sbc 0.738
nn poisoned sbc 0.728
['education_12.0', 'race_ White'] 66 66 65
poison fraction: 1.0
Poisoned lr test acc 0.822
Poisoned nn test acc 0.834
lr clean sbc 0.738
lr poisoned sbc 0.723
nn clean  sbc 0.738
nn poisoned sbc 0.754
['education_12.0', 'race_ White'] 66 132 65
poison fraction: 2.0
Poisoned lr test acc 0.814
Poisoned nn test acc 0.830
lr clean sbc 0.738
lr poisoned sbc 0.723
nn clean  sbc 0.738
nn poisoned sbc 0.723


Subclass Index: 43, Subclass Count: 11 
Sc_lr_pred: 0.771231, Sc_nn_pred: 0.797885 
['education_12.0', 'race_ Black', 'sex_ Male'] 15 7 4
poison fraction: 0.4666666666666667
Poisoned lr test acc 0.835
Poisoned nn test acc 0.828
lr clean sbc 1.000
lr poisoned sbc 1.000
nn clean  sbc 1.000
nn poisoned sbc 1.000
['education_12.0', 'race_ Black', 'sex_ Male'] 15 15 4
poison fraction: 1.0
Poisoned lr test acc 0.837
Poisoned nn test acc 0.834
lr clean sbc 1.00

Poisoned lr test acc 0.824
Poisoned nn test acc 0.830
lr clean sbc 0.718
lr poisoned sbc 0.641
nn clean  sbc 0.744
nn poisoned sbc 0.615


Subclass Index: 82, Subclass Count: 41 
Sc_lr_pred: 0.867468, Sc_nn_pred: 0.862854 
['education_7.0', 'race_ White'] 36 18 42
poison fraction: 0.5
Poisoned lr test acc 0.832
Poisoned nn test acc 0.835
lr clean sbc 0.952
lr poisoned sbc 0.952
nn clean  sbc 0.881
nn poisoned sbc 0.881
['education_7.0', 'race_ White'] 36 36 42
poison fraction: 1.0
Poisoned lr test acc 0.820
Poisoned nn test acc 0.832
lr clean sbc 0.952
lr poisoned sbc 0.833
nn clean  sbc 0.881
nn poisoned sbc 0.889
['education_7.0', 'race_ White'] 36 72 42
poison fraction: 2.0
Poisoned lr test acc 0.824
Poisoned nn test acc 0.822
lr clean sbc 0.952
lr poisoned sbc 0.833
nn clean  sbc 0.881
nn poisoned sbc 0.833


Subclass Index: 83, Subclass Count: 89 
Sc_lr_pred: 0.753493, Sc_nn_pred: 0.769549 
['education_7.0', 'race_ White', 'sex_ Male'] 118 59 107
poison fraction: 0.5
Poisoned lr t

Poisoned lr test acc 0.834
Poisoned nn test acc 0.833
lr clean sbc 0.667
lr poisoned sbc 0.600
nn clean  sbc 0.800
nn poisoned sbc 0.622
['education_2.0', 'race_ White', 'sex_ Male'] 11 22 15
poison fraction: 2.0
Poisoned lr test acc 0.837
Poisoned nn test acc 0.833
lr clean sbc 0.667
lr poisoned sbc 0.467
nn clean  sbc 0.800
nn poisoned sbc 0.289


# ClusterMatch

###  Data preprocessing

In [47]:
#Cluster match Data preprocessing

np.random.seed(0)

from sklearn import cluster
km = cluster.KMeans(n_clusters=100)   # KMeans with 100 clusters
km.fit(ho_x)                          # fit the ho_x (Data_aux)

# dim of cluster_centers_ : (100, 57)

test_km = km.predict(test_x)          # predict the cluster centers for test dataset (D_test), (7692 x 1)

train_km = km.predict(train_x)        # predict the cluster centers for test dataset (D_train), (7841 x 1)

kd_sbcl_conf = []
kd_used = []

# declaring arrays for storing errors in the subclasses, i.e [subclasses, (clean_acc, collat, target), pois_ind]

#  dim: (122 x 3 x len: pois rates) for a class (3 x pois len) dimensional array
kd_lr_errs = np.zeros((len(subclasses), 3, len(pois_rates))) 

#  dim: (122 x 3 x len: pois rates) for a class (3 x pois len) dimensional array
kd_nn_errs = np.zeros((len(subclasses), 3, len(pois_rates))) 

kmeans_designed = []
cl_inds, cl_cnts = np.unique(km.labels_, return_counts=True) # cl_inds has cluster center index, and count in a cluster 
                                                            # given by cl_ctns
print("cluster indexes: ", cl_inds)
print("cluster count: ", cl_cnts)


cluster indexes:  [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
 96 97 98 99]
cluster count:  [149 118  14 142   8 105  39  22  64  24  58   1  18  38   3  97   6   9
  27   6  15   1 487  13   4   5  57   5   3   2  14   1  17   1 311  48
   3   5  33   6   4   1   9   8 611  24  12   6   1 142   1   9 633   3
  88   1 294   8  12   1   3   7   3 494 134   7   1  11   2   4   5   2
  59  10   2   1  90   1 159 770  29   3  17 493 246   8 270  58   3 101
 857 107   2   2   3   8  18   1   2   1]


### Clustermatch: Poisoning and calculating the errors

In [48]:
# ClusterMatch: Initially there are 100 cluster, later if sentence filters the clusters 28 

for i, (cl_ind, cl_ct) in enumerate(zip(cl_inds, cl_cnts)): # for each cluster in D_aux data
    if cl_ct > 10 and cl_ct < 100:                            # select the cluster if it has more than 10 and less than 100 samples
        kd_used.append((i, cl_ct))                              # append the selected cluster
        
        print("\n")
        print("Cluster Index: %d, Cluster Count: %d, Test Samples: %d" % (cl_ind, cl_ct, np.where(test_km==cl_ind)[0].shape[0])) # print current cluster index, count and number of 
                                                                    # samples in test data belonging to current cluster index

        # getting the indexes of test samples that belong to current cluster index
        test_sbcl = np.where(test_km==cl_ind)

        # getting the indexes of aux data samples that belong to current cluster index
        sbcl = np.where(km.labels_==cl_ind)

        # getting the indexes of training samples that belong to current cluster index
        train_sbcl = np.where(train_km==cl_ind)

        # getting the test samples that belong to current cluster index
        p_t_x, p_t_y = test_x[test_sbcl], test_y[test_sbcl]

        # getting the aux samples that belong to current cluster index which is too be poisoned
        pois_x_base, pois_y_base = ho_x[sbcl], ho_y[sbcl]

        # getting the prediction probability of identical samples from(D_aux), and finding their mean
        sc_lr_pred, sc_nn_pred = lm_preds[sbcl].mean(), nn_preds[sbcl].mean()
        print(sc_lr_pred, sc_nn_pred)


        train_ct = train_sbcl[0].shape[0] # number of train samples that match current cluster index
        test_ct = p_t_x.shape[0]          # number of test samples that match current cluster index
        
        # multiplying prediction probability [x y] of a sample with [0 1] or [1 0] based on y values
        # taking their sum(mean) and multiplying with 2
        kd_sbcl_conf.append(2*np.multiply(lm.predict_proba(p_t_x),np.eye(2)[p_t_y.astype(int)]).mean())
        
#         all_errs = []
        
        # multiply poison rates with number of identical training samples, then take it as integer, dim same as pois rate, 1D array
        # pois_ct now has the number of poisoned samples which needs to be added to D_train
        # for each number(pois_ct) in the pois_rates 
        for j, pois_ct in enumerate([int(train_ct*pois_rate) for pois_rate in pois_rates]):
            
            # get the random indexes of 'pois_ct' number of samples from pois_x_base with replacement (repeated data is allowed)
            pois_inds = np.random.choice(pois_x_base.shape[0], pois_ct, replace=True)
            
            # get the x_values and flip the y values. Now we have the poison data
            pois_x, pois_y = pois_x_base[pois_inds], 1 - pois_y_base[pois_inds]
            
            # add the poisoned data to the training data
            total_x, total_y = np.concatenate((train_x, pois_x), axis=0), np.concatenate((train_y, pois_y), axis=0)
            
            print("poison fraction:", pois_ct/train_ct, train_ct, pois_ct, test_ct)
            
            # creating three Logistic regression model so that we could train three models and average their results
            lmps = [linear_model.LogisticRegression(solver='liblinear', max_iter=500) for _ in range(3)]
            for lmp in lmps:
                lmp.fit(total_x, total_y)
            
            # creating three Logistic regression model so that we could train three models on the Poisoned Data (total_x, total_y)
            nnps = [neural_network.MLPClassifier(hidden_layer_sizes=(10,), max_iter=3000) for _ in range(3)]
            for nnp in nnps:
                nnp.fit(total_x, total_y)
                
            # get the average accuracy score on test data (D_test) for three models of LR
            lmp_acc_col = np.mean([lmp.score(test_x, test_y) for lmp in lmps])
            print("Poisoned lr test acc {:.3f}".format(lmp_acc_col))
            
            # get the average accuracy score on test data (D_test) for three models of NN
            nnp_acc_col = np.mean([nnp.score(test_x, test_y) for nnp in nnps])
            print("Poisoned nn test acc {:.3f}".format(nnp_acc_col))
            
            # checking the score for identical samples(p_t_x, p_t_y) in test data on Clean LR model
            lmc_sbc = lm.score(p_t_x, p_t_y)
            print("lr clean sbc {:.3f}".format(lmc_sbc))
            
            # checking the score for identical samples(p_t_x, p_t_y) in test data on Poisoned LR model
            lmp_sbc_itst = np.mean([lmp.score(p_t_x, p_t_y) for lmp in lmps])
            print("lr poisoned sbc {:.3f}".format(lmp_sbc_itst))
            
            # checking the score for identical samples(p_t_x, p_t_y) in test data on Clean NN model
            nnc_sbc = nn.score(p_t_x, p_t_y)
            print("nn cl sbc {:.3f}".format(nnc_sbc))
            
            # checking the score for identical samples(p_t_x, p_t_y) in test data on Clean NN model
            nnp_sbc_itst = np.mean([nnp.score(p_t_x, p_t_y) for nnp in nnps])
            print("nn sbc {:.3f}".format(nnp_sbc_itst))
            
            # storing the errors as (subclass, (row0: clean_acc, row1: collat, row2: target), (pois_ind))
            kd_lr_errs[i, 0, j] = lmc_sbc
            kd_lr_errs[i, 1, j] = lmp_acc_col
            kd_lr_errs[i, 2, j] = lmp_sbc_itst
            
            kd_nn_errs[i, 0, j] = nnc_sbc
            kd_nn_errs[i, 1, j] = nnp_acc_col
            kd_nn_errs[i, 2, j] = nnp_sbc_itst



Cluster Index: 2, Cluster Count: 14, Test Samples: 16
0.9498838903232067 0.999999361378806
poison fraction: 0.5 20 10 16
Poisoned lr test acc 0.822
Poisoned nn test acc 0.813
lr clean sbc 1.000
lr poisoned sbc 1.000
nn cl sbc 1.000
nn sbc 0.750
poison fraction: 1.0 20 20 16
Poisoned lr test acc 0.821
Poisoned nn test acc 0.831
lr clean sbc 1.000
lr poisoned sbc 1.000
nn cl sbc 1.000
nn sbc 1.000
poison fraction: 2.0 20 40 16
Poisoned lr test acc 0.835
Poisoned nn test acc 0.790
lr clean sbc 1.000
lr poisoned sbc 1.000
nn cl sbc 1.000
nn sbc 0.333


Cluster Index: 6, Cluster Count: 39, Test Samples: 42
0.8813653877501798 0.8708135470365866
poison fraction: 0.48717948717948717 39 19 42
Poisoned lr test acc 0.836
Poisoned nn test acc 0.833
lr clean sbc 0.976
lr poisoned sbc 0.976
nn cl sbc 0.976
nn sbc 0.881
poison fraction: 1.0 39 39 42
Poisoned lr test acc 0.822
Poisoned nn test acc 0.832
lr clean sbc 0.976
lr poisoned sbc 0.976
nn cl sbc 0.976
nn sbc 0.770
poison fraction: 2.0 39 78 

Poisoned lr test acc 0.823
Poisoned nn test acc 0.833
lr clean sbc 1.000
lr poisoned sbc 0.778
nn cl sbc 1.000
nn sbc 1.000


Cluster Index: 35, Cluster Count: 48, Test Samples: 53
0.8889555504550101 0.8691794375578562
poison fraction: 0.5 58 29 53
Poisoned lr test acc 0.834
Poisoned nn test acc 0.830
lr clean sbc 0.943
lr poisoned sbc 0.943
nn cl sbc 0.981
nn sbc 0.925
poison fraction: 1.0 58 58 53
Poisoned lr test acc 0.834
Poisoned nn test acc 0.833
lr clean sbc 0.943
lr poisoned sbc 0.962
nn cl sbc 0.981
nn sbc 0.950
poison fraction: 2.0 58 116 53
Poisoned lr test acc 0.834
Poisoned nn test acc 0.824
lr clean sbc 0.943
lr poisoned sbc 0.962
nn cl sbc 0.981
nn sbc 0.704


Cluster Index: 38, Cluster Count: 33, Test Samples: 43
0.9203522571759143 0.8837927010926333
poison fraction: 0.48717948717948717 39 19 43
Poisoned lr test acc 0.836
Poisoned nn test acc 0.832
lr clean sbc 0.930
lr poisoned sbc 0.884
nn cl sbc 0.837
nn sbc 0.938
poison fraction: 1.0 39 39 43
Poisoned lr test acc 0.

In [49]:
# number of filters for FeatureMatch and ClusterMatch
print(len(hd_used), len(kd_used))


35 28


### Sorting the error rates and comparing the filter functions for both FeatureMatch and ClusterMatch


In [52]:
hd_lr_errs  # storing the errors as (subclass, (row0: clean_acc, row1: collat, row2: target), (pois_ind))
hd_nn_errs  # storing the errors as (subclass, (row0: clean_acc, row1: collat, row2: target), (pois_ind))

# computing error rates for FetureMatch, indexing of hd_used and hd_targets are same
hd_targets = np.zeros((len(hd_used), 3, 2)) # dim: (len(hd filter: 35) X 3(pois_ind) X 2). storing nn_errors

for j, ((i, count), conf) in enumerate(zip(hd_used, hd_sbcl_conf)): # for each subclasses(filter) in FeatureMatch
    for pois_ind in range(3):
        #print(i, count, conf, pois_ind)
        this_err = hd_nn_errs[i, :, pois_ind] # pick pois_ind column of (3 x pois_ind) matrix
        clean_acc, collat, target = this_err[0], this_err[1], this_err[2]
        
        # target is poisoned acc - clean acc for identical test samples in a subclass
        hd_targets[j, pois_ind, 0] = clean_acc - target  
        hd_targets[j, pois_ind, 1] = collat  # collat is poisoned acc on test dataser(D_test)

        
# computing error rates for ClusterMatch

# input: clean_nn test accuracy, poisoned_nn collat, frac of test samples belonging to current 
#        cluster, cleann_nn cluster test samples, poisoned_nn cluster test samples
def compute_collat(acc_before, acc_after, subpop_frac, pre_subpop, post_subpop):
    other_acc_before = (acc_before - subpop_frac*pre_subpop)/(1-subpop_frac)
    other_acc_after = (acc_after - subpop_frac*post_subpop)/(1-subpop_frac)
    return other_acc_after - other_acc_before

# indexing of kd_used and kd_targets are same
kd_targets = np.zeros((len(kd_used), 3, 2)) # dim: (len(kd filter: 28) X 3 X 2)

for j, ((i, count), conf) in enumerate(zip(kd_used, kd_sbcl_conf)): # for each cluster(filter) in ClusterMatch
    # get the indexes of test samples which have current cluster index as its cluster index
    this_inds = np.where(test_km==i)[0] 
    size = this_inds.size                 # number of test samples that belong to this cluster
    
#     pre_acc = nn.score(test_x[this_inds], test_y[this_inds]) # score of test samples belonging to current cluster given
#                                                              # clean nn
#     print(pre_acc, size, pre_acc*size)    # print accuracy score, size, and their multiplication
    for pois_ind in range(3):
        #print(i, count, conf, pois_ind)
        this_err = kd_nn_errs[i, :, pois_ind]      # pick pois_ind column of (3 x pois_ind) matrix
        clean_acc, collat, target = this_err[0], this_err[1], this_err[2]
        
        # target is poisoned acc - clean acc for identical test samples in a cluster
        kd_targets[j, pois_ind, 0] = clean_acc - target
        # compute collat by giving clean_nn test accuracy, poisoned_nn collat, frac of test samples belonging to current 
        # cluster, cleann_nn cluster test samples, poisoned_nn cluster test samples
        kd_targets[j, pois_ind, 1] = compute_collat(clean_test_acc, collat, size/test_x.shape[0], clean_acc, target)


### Features with highest Target metric for each poisoning rate

In [53]:
# # print target metric for FeatureMatch
# for i in range(len(hd_targets)):
#     for poi_ind in range(3):
#         if poi_ind == 0:
#             print("Index: %d, Poison ratio: % d, target: %f " %(i, poi_ind, hd_targets[i, poi_ind, 0]))

# sorting the error rate for FeatureMatch based on target metric

for pois_ind in range(3): # for each pois_ind/pois frac
    print("\n")
    
    # print the poison index and poison rate
    print("Pois Index: %d, Pois fraction: %f of identical training samples " %( pois_ind, pois_rates[pois_ind]))
    
    # sort the target metric scores for current poison index
    sorted_hd_targets = np.argsort(hd_targets[:, pois_ind, 0])
    
    # take the last index of sorted_hd_targets as it is the index with highest target score for current poison index
    top1_index = sorted_hd_targets[-1]
    
#     print("Hd error/used Index: ", top1_index) # print the top1_index

    # get the subclasses index, count for the feature with the highest target score using top1_index, 
    # the key is hd_target and hd_used have same indexing, so top1_index which is index in hd_targets is also the same index
    # in hd_used, then hd_used has the index and count of the subclass to which the highest target score belongs to
    subclasses_ind, count = hd_used[top1_index]
    print("Subclass Index: %d, Count: %d" % (subclasses_ind, count))
    
    subclass = subclasses[subclasses_ind] # get the subclass to which the highest score belongs to
    print([prot_col for v, prot_col in zip(subclass, prot_cols) if v > 0.5]) # print the subclass
    print(subclass)
        
    # print its target and collat values
    print("Target: %f, Collat: %f" % (hd_targets[top1_index, pois_ind, 0], hd_targets[top1_index, pois_ind, 1]))
 



Pois Index: 0, Pois fraction: 0.500000 of identical training samples 
Subclass Index: 112, Count: 28
['education_3.0', 'race_ White', 'sex_ Male']
[0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1.]
Target: 0.247619, Collat: 0.833637


Pois Index: 1, Pois fraction: 1.000000 of identical training samples 
Subclass Index: 83, Count: 89
['education_7.0', 'race_ White', 'sex_ Male']
[0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1.]
Target: 0.205607, Collat: 0.830820


Pois Index: 2, Pois fraction: 2.000000 of identical training samples 
Subclass Index: 118, Count: 18
['education_2.0', 'race_ White', 'sex_ Male']
[1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1.]
Target: 0.511111, Collat: 0.832640


### Clusters for highest Target metric for each poisoning rate

In [93]:
# # print target metric for ClusterMatch

# for i in range(len(kd_targets)):
#     for poi_ind in range(3):
#         if poi_ind == 0:
#             print("Index: %d, Poison ratio: % d, target: %f " %(i, poi_ind, kd_targets[i, poi_ind, 0]))

# sorting the error rate for ClusterMatch based on target metric

for pois_ind in range(3): # for each pois_ind/pois frac
    print("\n")
    
    # print the poison index and poison rate
    print("Pois Index: %d, Pois fraction: %f of identical training samples " %( pois_ind, pois_rates[pois_ind]))
    
    # sort the target metric scores for current poison index
    sorted_kd_targets = np.argsort(kd_targets[:, pois_ind, 0])
    
    # take the last index of sorted_kd_targets as it is the index with highest target score for current poison index
    top1_ind = sorted_kd_targets[-1]
    
#     print("kd error/used Index: ", top1_index) # print the top1_index

    # get the cluster index, count for the feature with the highest target score using top1_index, 
    # the key is kd_target and kd_used have same indexing, so top1_index which is index in kd_targets is also the same index
    # in kd_used, then kd_used has the index and count of the cluster to which the highest target score belongs to
    cluster_ind, count = kd_used[top1_ind]
    print("Cluster Index: %d, Count: %d" % (cluster_ind, count))
    
    # get the index of training samples that belong to that cluster with highest target
    train_examp_ind = np.where(train_km==cluster_ind)[0]    # has indexes of such samples
    train_examp_rand_ind = np.random.choice(train_examp_ind.shape[0], 3, replace= False) # pick five random such samples
    for i in train_examp_rand_ind:
        example = train_x[i]        # print the examples in the cluster
        print(example)
        print([prot_col for v, prot_col in zip(example, all_cols) if v > 0.5]) # print the examples in cluster
        
    # print its target and collat values
    print("Target: %f, Collat: %f" % (kd_targets[top1_ind, pois_ind, 0], kd_targets[top1_ind, pois_ind, 1]))
    
    



Pois Index: 0, Pois fraction: 0.500000 of identical training samples 
Cluster Index: 2, Count: 14
[5.000e+01 0.000e+00 1.902e+03 4.000e+01 0.000e+00 1.000e+00 0.000e+00
 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
 0.000e+00 0.000e+00 0.000e+00 1.000e+00 0.000e+00 0.000e+00 0.000e+00
 1.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
 1.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00
 0.000e+00 0.000e+00 1.000e+00 0.000e+00 0.000e+00 0.000e+00 1.000e+00
 0.000e+00]
['age', 'capital-loss', 'hours-per-week', 'workclass_ Local-gov', 'education_14.0', 'marital_ Married-civ-spouse', 'occupation_ Prof-specialty', 'relationship_ Wife', 'race_ White']
[57.  0.  0. 48.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  1.  0.  0.  0.  0.  1.  0.  0.  0.  0.  0. 