In [1]:
import pandas as pd
from tqdm import tqdm
from experiments import Experiment
from folktables import ACSDataSource, ACSEmployment, ACSIncomePovertyRatio, ACSHealthInsurance

In [3]:
root_dir = 'data'
data_source = ACSDataSource(survey_year='2018', horizon='1-Year', survey='person', root_dir = root_dir)
acs_data = data_source.get_data(download=True)

Downloading data for 2018 1-Year person survey for AL...
Downloading data for 2018 1-Year person survey for AK...
Downloading data for 2018 1-Year person survey for AZ...
Downloading data for 2018 1-Year person survey for AR...
Downloading data for 2018 1-Year person survey for CA...
Downloading data for 2018 1-Year person survey for CO...
Downloading data for 2018 1-Year person survey for CT...
Downloading data for 2018 1-Year person survey for DE...
Downloading data for 2018 1-Year person survey for FL...
Downloading data for 2018 1-Year person survey for GA...
Downloading data for 2018 1-Year person survey for HI...
Downloading data for 2018 1-Year person survey for ID...
Downloading data for 2018 1-Year person survey for IL...
Downloading data for 2018 1-Year person survey for IN...
Downloading data for 2018 1-Year person survey for IA...
Downloading data for 2018 1-Year person survey for KS...
Downloading data for 2018 1-Year person survey for KY...
Downloading data for 2018 1-Yea

In [4]:
protected_attribute = "race"
task_types =  ["employment", "income_poverty", "health_insurance"]
#model_types = ["logistic", "gbm", "nn", "svm", "tree"]
model_types = ["logistic"]
n_train = 2588885
n_test = 647222

In [5]:
employment = ACSEmployment.df_to_numpy(acs_data)
income_poverty = ACSIncomePovertyRatio.df_to_numpy(acs_data)
health_insurance = ACSHealthInsurance.df_to_numpy(acs_data)

In [6]:
df_employment = pd.DataFrame(employment[0])
df_employment["employment"] = employment[1]
df_employment["employment"] = df_employment["employment"].astype(int)
df_employment["race"] = employment[2]
df_employment["race"] = (df_employment["race"]==1).astype(int)

df_income_poverty = pd.DataFrame(income_poverty[0])
df_income_poverty["income_poverty"] = income_poverty[1]
df_income_poverty["income_poverty"] = df_income_poverty["income_poverty"].astype(int)
df_income_poverty["race"] = income_poverty[2]
df_income_poverty["race"] = (df_income_poverty["race"]==1).astype(int)

df_health_insurance = pd.DataFrame(health_insurance[0])
df_health_insurance["health_insurance"] = health_insurance[1]
df_health_insurance["health_insurance"] = df_health_insurance["health_insurance"].astype(int)
df_health_insurance["race"] = health_insurance[2]
df_health_insurance["race"] = (df_health_insurance["race"]==1).astype(int)

In [10]:
data = {"employment": df_employment, "income_poverty": df_income_poverty, "health_insurance": df_health_insurance}

# Randomness Based on Distance to Threshold

In [11]:
random_thresholds = [0, 0.1, 0.15, 0.2, 0.25, 0.3, 0.4, 0.5]

results = []
conformal_pred = False

data_scale = 25000
random_seed = 0

exp = Experiment(data, protected_attribute, task_types,
                     model_types, random_thresholds, n_train, n_test, random_seed, data_scale, conformal_pred)

exp.pretrain_models()
results.append(exp.experiment_baseline())
results.append(exp.experiment_tasks())
results.append(exp.experiment_models())
results.append(exp.experiment_features())
results.append(exp.experiment_partitions())
results.append(exp.experiment_all())

results = pd.concat(results)
results.to_csv("acs_results_0.csv", index=False)

employment logistic
income_poverty logistic
health_insurance logistic
Running Baseline Experiment
Running Tasks Experiment
Running Models Experiment
Running Features Experiment
employment logistic
income_poverty logistic
health_insurance logistic
Running Data Partitions Experiment
employment logistic
income_poverty logistic
health_insurance logistic
Running All Variations Experiment
employment logistic
income_poverty logistic
health_insurance logistic
Running Models Experiment


# Randomness Based on Conformal Prediction

In [None]:
random_thresholds = [0, 0.1, 0.2, 0.25, 0.3, 0.35, 0.4, 0.5]

results = []
conformal_pred = True

data_scale = 25000
random_seed = 0

exp = Experiment(data, protected_attribute, task_types,
                     model_types, random_thresholds, n_train, n_test, random_seed, data_scale, conformal_pred)

exp.pretrain_models()
results.append(exp.experiment_baseline())
results.append(exp.experiment_tasks())
results.append(exp.experiment_models())
results.append(exp.experiment_features())
results.append(exp.experiment_partitions())
results.append(exp.experiment_all())

results = pd.concat(results)
results.to_csv("acs_results_conformal_0.csv", index=False)