In [1]:
import numpy as np
import random
from platypus import Hypervolume, display, calculate, CMAES, MOEAD, NSGAII, NSGAIII, SPEA2, IBEA, Problem, Real
from copy import deepcopy
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import pandas as pd

from datetime import datetime

In [2]:
from load_data import *
from bias_functions import *
from utility_functions import *

In [3]:
def getDataset(setSelection):
    if setSelection == 'toy':
        X,y = generate_toy_data(1000,200,2)
    elif setSelection == 'adult':
        protectedAttributes={'race':'White','gender':'Male'}
        X,y = load_adult(protectedAttributes=protectedAttributes)
    elif setSelection == 'bank':
        X,y = load_bank(smaller=False)
    elif setSelection == 'german':
        X,y = load_german()
    elif setSelection == 'mortgage':
        protectedCategoricalAttributes={'applicant_ethnicity_name':'Not Hispanic or Latino',
                                'applicant_race_name_1':'White','applicant_sex_name':'Male'}
        protectedNumericalAttributes=['minority_population']
        X,y = load_mortgage(protectedCategoricalAttributes=protectedCategoricalAttributes, \
                            protectedNumericalAttributes=protectedNumericalAttributes)
    else:
        print('dataset not recognised')
        
    X = np.hstack([X, np.ones((X.shape[0],1))]) ## add ones to solve for affine functions
    
    return X,y

In [4]:
# new problems

# problem 4: all datasets and {age,gender,race} [change dataset in main loop ; change senstitive attribute in _train and _test functions]
# objectives: accuracy, DI, EO, DM(OMR)

def problem4_base(w,X,y,sensitiveAttributeIndex):
    errorRateObjective = errorRate(w,X,y)
    fairnessObjective1 = differenceDisparateImpactModel(w,X,sensitiveAttributeIndex=sensitiveAttributeIndex)
    fairnessObjective2 = differenceEqualOpportunity(w,X,y,sensitiveAttributeIndex=sensitiveAttributeIndex)
    fairnessObjective3 = differenceDisparateMistreatment(w,X,y,sensitiveAttributeIndex=sensitiveAttributeIndex,type='OMR')
    return errorRateObjective, np.abs(fairnessObjective1[0]-fairnessObjective1[1]),\
np.abs(fairnessObjective2[0]-fairnessObjective2[1]), np.abs(fairnessObjective3[0]-fairnessObjective3[1])

def problem4_train(w):
    return problem4_base(w,trainxs,trainys,sensitiveAttributeIndex=0)
def problem4_test(w):
    return problem4_base(w,testxs,testys,sensitiveAttributeIndex=0)

In [5]:
targetValue = 0.01
targetMeasureArray = np.array([[np.inf,np.inf,np.inf],[targetValue,np.inf,np.inf],\
                             [np.inf,targetValue,np.inf],[np.inf,np.inf,targetValue],\
                             [targetValue,targetValue,np.inf],[targetValue,np.inf,targetValue],\
                             [np.inf,targetValue,targetValue],[targetValue,targetValue,targetValue]])

## Sensitive attributes list:

toy: 2 ; variables = 4

adult: 8 - race, 9 - gender ;  variables = 15 ; 45222 samples

bank: 0 - age ; variables = 21 ; 41188 samples

german: 6 - gender, 9 - age, 14 - foreign worker ; variables = 25 ; 1000 samples

mortgage: 12 - ethnicity, 14 - race, 16 - gender, 24 - minority population ; variables = 30 ; 200000 samples

In [6]:
now = datetime.now()

# these lines define the problem, test problem, dataset, algorithm and number of sampling runs
noOfVariables = 21
noOfObjectives = 4

problem = Problem(noOfVariables,noOfObjectives)
problem.function = problem4_train

testProblem = Problem(noOfVariables,noOfObjectives)
testProblem.function = problem4_test

dataset = 'bank'

noOfSamplingRuns = 20

X,y = getDataset(dataset)

problem.types[:] = Real(-5,5)

hypervolumeArray = np.zeros((noOfSamplingRuns, 2)) # col 0 for training results (hypervolume), col 1 for test results
extremesArray = np.zeros((2, noOfObjectives, noOfObjectives, noOfSamplingRuns)) # the first dimension is for train/test
averagePointArray = np.zeros((2, noOfSamplingRuns, noOfObjectives)) # the first dimension is for train/test. This array holds the average objective point for each run

overallErrorRateSet = np.zeros((2, noOfSamplingRuns, len(targetMeasureArray)))

# these will be used to store the algorithm that returns the largest hypervolume
bestTrainAlgorithm = None
bestTrainHypervolume = 0
bestTestAlgorithm = None
bestTestHypervolume = 0
totalTrainTime = 0
totalTestTime = 0

for run in range(noOfSamplingRuns):

    # generate new train/test split for each run
    trainxs, testxs, trainys, testys  = train_test_split(X,y,train_size=0.8)  
    
    # training run
    startTime = time.perf_counter()
    
    trainAlgorithm = CMAES(problem)
#     trainAlgorithm = SPEA2(problem,population_size=500)
    trainAlgorithm.run(10000)

    trainHyp = Hypervolume(minimum=[0]*noOfObjectives,maximum=[1]*noOfObjectives)
    trainHypResult = trainHyp(trainAlgorithm.result)  
    hypervolumeArray[run, 0] = trainHypResult
    
    singleRunExtremesArray, averagePoint = findExtremes_averagePoint(trainAlgorithm)
    extremesArray[0,:,:,run] = singleRunExtremesArray
    averagePointArray[0,run,:] = averagePoint
    
    minErrorRateSet = minErrorRateSet_given_targetMeasuresArray(trainAlgorithm, targetMeasureArray)
    overallErrorRateSet[0,run,:] = minErrorRateSet[:,0]
    
    endTime = time.perf_counter()
    trainTime = endTime - startTime
    totalTrainTime += trainTime
    
    print("{}: training took {:.2f} seconds w/ hypervolume {:2f}".format(run, trainTime, trainHypResult))
    
    
    # test run
    testAlgorithm = deepcopy(trainAlgorithm)

    startTime = time.perf_counter()

    for resultNumber, result in enumerate(testAlgorithm.result):

        w=result.variables
        objectivesResult = testProblem.function(w)

        # copy objective results individually to the testAlgorithm object
        for i in range(len(objectivesResult)):  
            testAlgorithm.result[resultNumber].objectives[i]=objectivesResult[i]

    testHyp = Hypervolume(minimum=[0]*noOfObjectives,maximum=[1]*noOfObjectives)
    testHypResult = testHyp(testAlgorithm.result)
    hypervolumeArray[run, 1] = testHypResult
    
    singleRunExtremesArray, averagePoint = findExtremes_averagePoint(testAlgorithm)
    extremesArray[1,:,:,run] = singleRunExtremesArray
    averagePointArray[1,run,:] = averagePoint
    
    for targetMeasure in range(len(targetMeasureArray)):
        if minErrorRateSet[targetMeasure,0] == 1.0:
            overallErrorRateSet[1,run,targetMeasure] = 1.0
        else:
            overallErrorRateSet[1,run,targetMeasure] = testAlgorithm.result[np.int(minErrorRateSet[targetMeasure,1])].objectives[0]
    
    
    endTime = time.perf_counter()
    testTime = endTime - startTime
    totalTestTime += testTime

    print("{}: testing took {:.2f} seconds w/ hypervolume {:2f}".format(run, testTime, testHypResult))
    
    if trainHypResult > bestTrainHypervolume:
        bestTrainHypervolume = trainHypResult
        bestTrainAlgorithm = deepcopy(trainAlgorithm)
        
        bestTestHypervolume = testHypResult
        bestTestAlgorithm = deepcopy(testAlgorithm)
        
        bestData = [trainxs, testxs, trainys, testys]
        bestDataSavePath = "saved_data/"+now.strftime("%Y%m%d_%H%M")+"_"+str(problem.function)[10:18]+"_"+dataset+"_"+str(trainAlgorithm)[21:-26]+"_data"
        np.save(bestDataSavePath, bestData)
        
# resultsSummary:
# column 0: average hypervolume, column 1: hypervolume std, column 2: average point std (Euclidean distance), column 3: average time
# row 0: training data, row 1: test data
resultsSummary = np.zeros((2,4)) 
resultsMean = np.sum(hypervolumeArray, axis=0)/noOfSamplingRuns 
resultsSummary[:,0] = resultsMean
resultsSummary[:,1] = np.sqrt(np.sum((hypervolumeArray - resultsMean)**2, axis=0)/noOfSamplingRuns)
averagePointMeans = np.mean(averagePointArray,axis=1)
resultsSummary[0,2] = np.std(np.sqrt(np.sum((averagePointArray[0,:,:]-averagePointMeans[0,:])**2,axis=1)))
resultsSummary[1,2] = np.std(np.sqrt(np.sum((averagePointArray[1,:,:]-averagePointMeans[1,:])**2,axis=1)))
resultsSummary[0,3] = totalTrainTime/noOfSamplingRuns
resultsSummary[1,3] = totalTestTime/noOfSamplingRuns
print(resultsSummary) 

(41188, 21)
(41188, 20)
age: 0
A smaller version of the dataset is loaded...
0: training took 219.58 seconds w/ hypervolume 0.978955
0: testing took 105.34 seconds w/ hypervolume 0.965577


  return array(a, dtype, copy=False, order=order, subok=True)


1: training took 511.51 seconds w/ hypervolume 0.980903
1: testing took 117.84 seconds w/ hypervolume 0.855352
2: training took 60.10 seconds w/ hypervolume 0.978075


  p_yHatEqualY_z1 = n_yHatEqualY_z1 / n_z1


2: testing took 0.42 seconds w/ hypervolume 0.000000
3: training took 732.22 seconds w/ hypervolume 0.977082
3: testing took 761.90 seconds w/ hypervolume 0.962118
4: training took 112.51 seconds w/ hypervolume 0.978329
4: testing took 316.98 seconds w/ hypervolume 0.963974
5: training took 92.09 seconds w/ hypervolume 0.979081
5: testing took 55.02 seconds w/ hypervolume 0.913143
6: training took 77.53 seconds w/ hypervolume 0.979751
6: testing took 0.39 seconds w/ hypervolume 0.000000
7: training took 66.13 seconds w/ hypervolume 0.977881
7: testing took 0.42 seconds w/ hypervolume 0.000000
8: training took 1611.98 seconds w/ hypervolume 0.978296
8: testing took 441.72 seconds w/ hypervolume 0.692566
9: training took 856.73 seconds w/ hypervolume 0.979538
9: testing took 1.08 seconds w/ hypervolume 0.000000
10: training took 1191.88 seconds w/ hypervolume 0.979410
10: testing took 617.08 seconds w/ hypervolume 0.864981
11: training took 73.04 seconds w/ hypervolume 0.979334
11: testi