In [1]:
import yaml

import gc

from approx_thresh_general import tpr, fpr, precision

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier

from pipeline import FairDataset, FairPipeline, accuracy, f1

import pandas as pd

import warnings
warnings.filterwarnings("ignore")

my_classifiers = {
    # 'logistic_regression': LogisticRegression(),
    # 'random_forest': RandomForestClassifier(),
    # 'gradient_boosting': GradientBoostingClassifier(),
    # 'svc': SVC(probability=True),
    'knn': KNeighborsClassifier(),
    # 'mlp': MLPClassifier()
}

metrics_dict = {
    'tpr': tpr,
    'fpr': fpr,
    'precision': precision,
    'accuracy': accuracy,
    'f1': f1
}

metrics_functions = {
    'tpr': tpr,
    'fpr': fpr,
    'precision': precision,
}

pipeline = FairPipeline(classifiers=my_classifiers, 
                        classifier_config_path='configs/classifier_config.yml', 
                        metrics=metrics_dict,
                        metric_functions=metrics_functions,
                        lambda_=0.8)

X = pd.read_csv(f'matrices/ACSEmployment/Xs.csv')
y = pd.read_csv(f'matrices/ACSEmployment/ys.csv').squeeze()

acs_dataset = FairDataset(X, y, 'RAC1P_recoded')

In [2]:
def load_config(config_path):
    with open(config_path, 'r') as file:
        config = yaml.safe_load(file)
    return config

CONFIG_PATH = 'configs/test_config.yml'

config = load_config(CONFIG_PATH)
datasets = config['datasets']
classifier_config_path = 'configs/classifier_config.yml'

# set to True to estimate runtime
ESTIMATE_RUNTIME = False

# here we estimate the runtime for the pipeline
if ESTIMATE_RUNTIME:
    total_runtime = 0
    for dataset_name, sensitive_attrs in datasets.items():
        print(f"Estimating runtime for dataset: {dataset_name}")
        if dataset_name in ('ACSEmployment','ACSIncome','ACSMobility','ACSPublicCoverage','ACSTravelTime'):
            X = pd.read_csv(f'matrices/{dataset_name}/Xs.csv')
            y = pd.read_csv(f'matrices/{dataset_name}/ys.csv').squeeze()
        else:
            X = pd.read_csv(f'matrices/{dataset_name}/X.csv')
            y = pd.read_csv(f'matrices/{dataset_name}/y.csv').squeeze()

        # remove any rows that have null or nan
        X.dropna(inplace=True)
        y = y[X.index]

        dataset = FairDataset(X, y, sensitive_attrs)

        for sensitive_attr in sensitive_attrs:
            pipeline = FairPipeline(classifiers=my_classifiers, 
                                    classifier_config_path=classifier_config_path, 
                                    metrics=metrics_dict,
                                    metric_functions=metrics_functions,
                                    lambda_=0.8, max_error=0.01, max_total_combinations=1000)

            runtime = pipeline.estimate_runtime(dataset, sensitive_attr)
            total_runtime += runtime

        del X, y, dataset
        gc.collect()
        print()

    print(f"Total estimated runtime for all datasets and sensitive attributes: {total_runtime:.2f} seconds")

proceed = input("Do you want to proceed with the full pipeline run? (yes/no): ").strip().lower()

if proceed == 'yes':
    print("Running the full pipeline...")
    all_results = pd.DataFrame()
    for dataset_name, sensitive_attrs in datasets.items():
        print(f"Running pipeline for dataset: {dataset_name}")
        if dataset_name in ('ACSEmployment','ACSIncome','ACSMobility','ACSPublicCoverage','ACSTravelTime'):
            X = pd.read_csv(f'matrices/{dataset_name}/Xs.csv')
            y = pd.read_csv(f'matrices/{dataset_name}/ys.csv').squeeze()
        else:
            X = pd.read_csv(f'matrices/{dataset_name}/X.csv')
            y = pd.read_csv(f'matrices/{dataset_name}/y.csv').squeeze()

        # remove any rows that have null or nan
        X.dropna(inplace=True)
        y = y[X.index]

        dataset = FairDataset(X, y, sensitive_attrs)

        for sensitive_attr in sensitive_attrs:
            pipeline = FairPipeline(classifiers=my_classifiers, 
                                    classifier_config_path=classifier_config_path, 
                                    metrics=metrics_dict,
                                    metric_functions=metrics_functions,
                                    lambda_=0.8, max_error=0.01, max_total_combinations=1000)

            pipeline.tune_and_evaluate(dataset, dataset_name, sensitive_attr)
            results = pipeline.results_df
            results['sensitive_attr'] = sensitive_attr
            results['dataset'] = dataset_name
            all_results = all_results.append(results, ignore_index=True)
            
        # this avoids memory issues
        del X, y, dataset
        gc.collect()

    print("Pipeline run completed.")
else:
    print("Pipeline run aborted.")

# dump all results to pickle dataframe
all_results.to_pickle('all_results.pkl')


Running the full pipeline...
Running pipeline for dataset: ACSEmployment
Overall metrics for knn (original): {'tpr': 0.8596976793698106, 'fpr': 0.2247670807453416, 'precision': 0.7771362586605081, 'accuracy': 0.8155142654076556, 'f1': 0.8163347821692105}
Hyperparameters: {'n_neighbors': 20, 'weights': 'uniform'}
Metrics for knn (original) on 1: {'tpr': 0.8596648713345302, 'fpr': 0.2158899494665918, 'precision': 0.7888522789676002, 'accuracy': 0.820683661645423, 'f1': 0.822737686139748}
Hyperparameters: {'n_neighbors': 20, 'weights': 'uniform'}
Metrics for knn (original) on 2: {'tpr': 0.8485401459854015, 'fpr': 0.23529411764705882, 'precision': 0.7536466774716369, 'accuracy': 0.8031825795644891, 'f1': 0.7982832618025751}
Hyperparameters: {'n_neighbors': 20, 'weights': 'uniform'}
Metrics for knn (original) on 3: {'tpr': 0.8624708624708625, 'fpr': 0.28054298642533937, 'precision': 0.7489878542510121, 'accuracy': 0.7898966704936854, 'f1': 0.8017334777898159}
Hyperparameters: {'n_neighbors'

Threshold Combinations: 100%|██████████| 2401/2401 [00:10<00:00, 235.26it/s]


Best objective value: 0.584964341625366
Best thresholds: {1: 0.3333333333333333, 2: 0.3333333333333333, 3: 0.3333333333333333, 4: 0.3333333333333333}
Overall metrics for knn (fair): {'tpr': 0.9584841388120077, 'fpr': 0.31288819875776397, 'precision': 0.7363428197579326, 'accuracy': 0.8165295969133922, 'f1': 0.8328554250300619}
Hyperparameters: {'n_neighbors': 20, 'weights': 'uniform'}
Metrics for knn (fair) on 1: {'tpr': 0.9539198084979055, 'fpr': 0.3074115665356541, 'precision': 0.7443380807844968, 'accuracy': 0.819090382387022, 'f1': 0.8361967213114754}
Hyperparameters: {'n_neighbors': 20, 'weights': 'uniform'}
Metrics for knn (fair) on 2: {'tpr': 0.9635036496350365, 'fpr': 0.3219814241486068, 'precision': 0.717391304347826, 'accuracy': 0.8090452261306532, 'f1': 0.822429906542056}
Hyperparameters: {'n_neighbors': 20, 'weights': 'uniform'}
Metrics for knn (fair) on 3: {'tpr': 0.9766899766899767, 'fpr': 0.3416289592760181, 'precision': 0.7350877192982456, 'accuracy': 0.8151549942594719

Threshold Combinations: 100%|██████████| 1089/1089 [00:05<00:00, 195.18it/s]


Best objective value: 0.2220494342777255
Best thresholds: {1.0: 0.625, 2.0: 0.625}
Overall metrics for knn (fair): {'tpr': 0.6961890568447946, 'fpr': 0.07317546583850931, 'precision': 0.8966273649574993, 'accuracy': 0.8168341963651132, 'f1': 0.783796740172579}
Hyperparameters: {'n_neighbors': 20, 'weights': 'uniform'}
Metrics for knn (fair) on 1.0: {'tpr': 0.7072538860103627, 'fpr': 0.06806506849315068, 'precision': 0.9115191986644408, 'accuracy': 0.8200773860705073, 'f1': 0.7964989059080962}
Hyperparameters: {'n_neighbors': 20, 'weights': 'uniform'}
Metrics for knn (fair) on 2.0: {'tpr': 0.6854262914741706, 'fpr': 0.07741477272727272, 'precision': 0.8821621621621621, 'accuracy': 0.813931114104291, 'f1': 0.7714488300638147}
Hyperparameters: {'n_neighbors': 20, 'weights': 'uniform'}
Running pipeline for dataset: students
Overall metrics for knn (original): {'tpr': 0.8608695652173913, 'fpr': 0.011056511056511056, 'precision': 0.9565217391304348, 'accuracy': 0.960727969348659, 'f1': 0.906

Threshold Combinations: 100%|██████████| 1089/1089 [00:05<00:00, 201.09it/s]


Best objective value: 0.11300744210026964
Best thresholds: {0: 0.40625, 1: 0.40625}
Overall metrics for knn (fair): {'tpr': 0.8608695652173913, 'fpr': 0.011056511056511056, 'precision': 0.9565217391304348, 'accuracy': 0.960727969348659, 'f1': 0.9061784897025171}
Hyperparameters: {'n_neighbors': 5, 'weights': 'uniform'}
Metrics for knn (fair) on 0: {'tpr': 0.8571428571428571, 'fpr': 0.005747126436781609, 'precision': 0.9782608695652174, 'accuracy': 0.9624724061810155, 'f1': 0.9137055837563451}
Hyperparameters: {'n_neighbors': 5, 'weights': 'uniform'}
Metrics for knn (fair) on 1: {'tpr': 0.864, 'fpr': 0.015021459227467811, 'precision': 0.9391304347826087, 'accuracy': 0.9593908629441624, 'f1': 0.9}
Hyperparameters: {'n_neighbors': 5, 'weights': 'uniform'}
Pipeline run completed.
