# Imports

In [2]:
from folktables import folktables
from folktables import ACSDataSource
import numpy as np
from custom_functions import *
from aif360.datasets import StandardDataset
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from aif360.metrics import ClassificationMetric
import pandas as pd
import joblib
from aif360.algorithms.preprocessing import Reweighing

# Data Setup Code (from Spec Sheet)

In [3]:
# (Age) must be greater than 16 and less than 90,
# and (Person weight) must be greater than or equal to 1
def employment_filter(data):
    """Filters for the employment prediction task"""
    df = data
    df = df[df['AGEP'] > 16]
    df = df[df['AGEP'] < 90]
    df = df[df['PWGTP'] >= 1]
    return df

ACSEmployment = folktables.BasicProblem(
    features=[
        'AGEP',  # age; for range of values of features please check Appendix B.4 of Retiring Adult: New Datasets for Fair Machine Learning NeurIPS 2021 paper
        'SCHL',  # educational attainment
        'MAR',   # marital status
        'RELP',  # relationship
        'DIS',   # disability recode
        'ESP',   # employment status of parents
        'CIT',   # citizenship status
        'MIG',   # mobility status (lived here 1 year ago)
        'MIL',   # military service
        'ANC',   # ancestry recode
        'NATIVITY',  # nativity
        'DEAR',   # hearing difficulty
        'DEYE',   # vision difficulty
        'DREM',   # cognitive difficulty
        'SEX',    # sex
        'RAC1P',  # recoded detailed race code
        'GCL',    # grandparents living with grandchildren
    ],
    target='ESR',  # employment status recode
    target_transform=lambda x: x == 1,
    group='DIS',
    preprocess=employment_filter,
    postprocess=lambda x: np.nan_to_num(x, -1),
)

data_source = ACSDataSource(survey_year='2018', horizon='1-Year', survey='person')
acs_data = data_source.get_data(states=["FL"], download=True)  # data for Florida state

features, label, group = ACSEmployment.df_to_numpy(acs_data)

data = pd.DataFrame(features, columns=ACSEmployment.features)
data['label'] = label

favorable_classes = [True]
protected_attribute_names = [ACSEmployment.group]
privileged_classes = np.array([[1]])

data_for_aif = StandardDataset(
    data,
    label_name='label',
    favorable_classes=favorable_classes,
    protected_attribute_names=protected_attribute_names,
    privileged_classes=privileged_classes
)

privileged_groups = [{'DIS': 1}]
unprivileged_groups = [{'DIS': 2}]

  df.loc[pos, label_name] = favorable_label


# Task 1

In [4]:
# Split the dataset into train-val and test sets
train_and_val_data_2, test_data_1 = data_for_aif.split([0.7], shuffle=True, seed=0)

# Apply grid search and randomised train-val splits, saving all results and exporting best models
task_1_results = grid_search_models(train_and_val_data_2, custom_criterion_style='sum_of_logs')

reweight = False
Training model with C = 1e-06 and solver = newton-cg


  results = pd.concat([results, new_result], ignore_index=True)


Training model with C = 1e-06 and solver = lbfgs
Training model with C = 1e-06 and solver = liblinear
Training model with C = 1e-06 and solver = sag
Training model with C = 1e-06 and solver = saga
Training model with C = 1e-05 and solver = newton-cg
Training model with C = 1e-05 and solver = lbfgs
Training model with C = 1e-05 and solver = liblinear
Training model with C = 1e-05 and solver = sag
Training model with C = 1e-05 and solver = saga
Training model with C = 0.0001 and solver = newton-cg
Training model with C = 0.0001 and solver = lbfgs
Training model with C = 0.0001 and solver = liblinear
Training model with C = 0.0001 and solver = sag
Training model with C = 0.0001 and solver = saga
Training model with C = 0.001 and solver = newton-cg
Training model with C = 0.001 and solver = lbfgs
Training model with C = 0.001 and solver = liblinear
Training model with C = 0.001 and solver = sag
Training model with C = 0.001 and solver = saga
Training model with C = 0.01 and solver = newton

### Training and Validation Results

In [5]:
# Find the best model
best_accuracy_1, best_eod_1, best_nonzero_eod_1, best_cc_1 = find_best_results(task_1_results)

# Prine results
print(f"Best accuracy:\n", best_accuracy_1, f'\n')
print(f"Best EOD:\n", best_eod_1, f'\n')
print(f"Lowest non-zero EOD:\n", best_nonzero_eod_1, f'\n')
print(f"Best criterion:\n", best_cc_1, f'\n')

Best accuracy:
        C Solver  Mean accuracy  Mean EOD  Mean custom criterion
21  0.01  lbfgs       0.751253  0.615774              -1.243065 

Best EOD:
           C     Solver  Mean accuracy  Mean EOD  Mean custom criterion
0  0.000001  newton-cg       0.542484  0.019263              -0.631057
1  0.000001      lbfgs       0.542262  0.019263              -0.631466
4  0.000001       saga       0.542390  0.019263              -0.631230 

Lowest non-zero EOD:
           C     Solver  Mean accuracy  Mean EOD  Mean custom criterion
0  0.000001  newton-cg       0.542484  0.019263              -0.631057
1  0.000001      lbfgs       0.542262  0.019263              -0.631466
4  0.000001       saga       0.542390  0.019263              -0.631230 

Best criterion:
           C     Solver  Mean accuracy  Mean EOD  Mean custom criterion
0  0.000001  newton-cg       0.542484  0.019263              -0.631057 



### Performance on Held-out Test Set

In [6]:
# Metrics for both the best accuracy and best EOD models
best_accuracy_accuracy_1, best_accuracy_eod_1, best_accuracy_cc_1 = test_model(test_data_1, 'std_model_accuracy.joblib')
best_eod_accuracy_1, best_eod_eod_1, best_eod_cc_1 = test_model(test_data_1, 'std_model_eod.joblib')
best_cc_accuracy_1, best_cc_eod_1, best_cc_cc_1 = test_model(test_data_1, 'std_model_cc.joblib')

# Print results
print("Most accurate model:")
print(f"Accuracy: {best_accuracy_accuracy_1}")
print(f"EOD: {best_accuracy_eod_1}", f'\n')
print(f"Custom criterion: {best_accuracy_cc_1}")

print("Best EOD model:")
print(f"Accuracy: {best_eod_accuracy_1}")
print(f"EOD: {best_eod_eod_1}")
print(f"Custom criterion: {best_eod_cc_1}")

print("Best CC model:")
print(f"Accuracy: {best_cc_accuracy_1}")
print(f"EOD: {best_cc_eod_1}", f'\n')
print(f"Custom criterion: {best_cc_cc_1}")

Most accurate model:
Accuracy: 0.7523310487727127
EOD: 0.6736529561484942 

Custom criterion: 0.6725043999780665
Best EOD model:
Accuracy: 0.541819413452343
EOD: 0.01934235976789167
Custom criterion: 1.2552576841394483
Best CC model:
Accuracy: 0.525880618425247
EOD: 0.0 

Custom criterion: 1.2765504248353203


# Task 2

In [7]:
# Split the dataset into train-val and test sets
train_and_val_data_2, test_data_2 = data_for_aif.split([0.7], shuffle=True, seed=0)

# Apply grid search and randomised train-val splits, saving all results and exporting best models
task_2_results = grid_search_models(train_and_val_data_2, reweight=True)

reweight = True
Training model with C = 1e-06 and solver = newton-cg


  results = pd.concat([results, new_result], ignore_index=True)


Training model with C = 1e-06 and solver = lbfgs
Training model with C = 1e-06 and solver = liblinear
Training model with C = 1e-06 and solver = sag
Training model with C = 1e-06 and solver = saga
Training model with C = 1e-05 and solver = newton-cg
Training model with C = 1e-05 and solver = lbfgs
Training model with C = 1e-05 and solver = liblinear
Training model with C = 1e-05 and solver = sag
Training model with C = 1e-05 and solver = saga
Training model with C = 0.0001 and solver = newton-cg
Training model with C = 0.0001 and solver = lbfgs
Training model with C = 0.0001 and solver = liblinear
Training model with C = 0.0001 and solver = sag
Training model with C = 0.0001 and solver = saga
Training model with C = 0.001 and solver = newton-cg
Training model with C = 0.001 and solver = lbfgs
Training model with C = 0.001 and solver = liblinear
Training model with C = 0.001 and solver = sag
Training model with C = 0.001 and solver = saga
Training model with C = 0.01 and solver = newton

### Training and Validation Results

In [8]:
# Find the best results
best_accuracy_2, best_eod_2, best_nonzero_eod_2, best_cc_2 = find_best_results(task_2_results)

# Print results analysis
print(f"Best accuracy:\n", best_accuracy_2, f'\n')
print(f"Best EOD:\n", best_eod_2, f'\n')
print(f"Lowest non-zero EOD:\n", best_nonzero_eod_2, f'\n')
print(f"Best criterion:\n", best_cc_2, f'\n')

Best accuracy:
         C     Solver  Mean accuracy  Mean EOD  Mean custom criterion
15  0.001  newton-cg        0.72007 -0.018474               1.482009
19  0.001       saga        0.72007 -0.018457               1.482043 

Best EOD:
        C Solver  Mean accuracy  Mean EOD  Mean custom criterion
24  0.01   saga       0.719925 -0.022143                1.47458 

Lowest non-zero EOD:
         C Solver  Mean accuracy  Mean EOD  Mean custom criterion
18  0.001    sag       0.720036 -0.018224               1.482447 

Best criterion:
         C Solver  Mean accuracy  Mean EOD  Mean custom criterion
18  0.001    sag       0.720036 -0.018224               1.482447 



### Performance on Held-out Test Set

In [9]:
# Metrics for both the best accuracy and best EOD models
best_accuracy_accuracy_2, best_accuracy_eod_2, best_accuracy_cc_2 = test_model(test_data_2, 'fair_model_accuracy.joblib')
best_eod_accuracy_2, best_eod_eod_2, best_eod_cc_2 = test_model(test_data_2, 'fair_model_eod.joblib')
best_cc_accuracy_2, best_cc_eod_2, best_cc_cc_2 = test_model(test_data_2, 'fair_model_cc.joblib')

# Print results
print("Most accurate model:")
print(f"Accuracy: {best_accuracy_accuracy_2}")
print(f"EOD: {best_accuracy_eod_2}", f'\n')
print(f"Custom criterion: {best_accuracy_cc_2}")

print("Best EOD model:")
print(f"Accuracy: {best_eod_accuracy_2}")
print(f"EOD: {best_eod_eod_2}")
print(f"Custom criterion: {best_eod_cc_2}")

print("Best CC model:")
print(f"Accuracy: {best_cc_accuracy_2}")
print(f"EOD: {best_cc_eod_2}", f'\n')
print(f"Custom criterion: {best_cc_cc_2}")
# Print results
print("Most accurate model:")
print(f"Accuracy: {best_accuracy_accuracy_2}")
print(f"EOD: {best_accuracy_eod_2}", f'\n')
print("Best EOD model:")
print(f"Accuracy: {best_eod_accuracy_2}")
print(f"EOD: {best_eod_eod_2}")

Most accurate model:
Accuracy: 0.7197760599298693
EOD: 0.004663503874752228 

Custom criterion: 1.5087723169670522
Best EOD model:
Accuracy: 0.525880618425247
EOD: 0.0
Custom criterion: 1.2765504248353203
Best CC model:
Accuracy: 0.7209316225693337
EOD: 0.03690541551817195 

Custom criterion: 1.4472935830786773
Most accurate model:
Accuracy: 0.7197760599298693
EOD: 0.004663503874752228 

Best EOD model:
Accuracy: 0.525880618425247
EOD: 0.0


# Task 3